summaryrefslogtreecommitdiff
path: root/gmp/mpn/powerpc32
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/powerpc32')
-rw-r--r--gmp/mpn/powerpc32/750/com.asm79
-rw-r--r--gmp/mpn/powerpc32/750/gmp-mparam.h192
-rw-r--r--gmp/mpn/powerpc32/750/lshift.asm155
-rw-r--r--gmp/mpn/powerpc32/750/rshift.asm153
-rw-r--r--gmp/mpn/powerpc32/README180
-rw-r--r--gmp/mpn/powerpc32/addlsh1_n.asm100
-rw-r--r--gmp/mpn/powerpc32/addmul_1.asm155
-rw-r--r--gmp/mpn/powerpc32/aix.m482
-rw-r--r--gmp/mpn/powerpc32/aors_n.asm157
-rw-r--r--gmp/mpn/powerpc32/bdiv_dbm1c.asm131
-rw-r--r--gmp/mpn/powerpc32/darwin.m491
-rw-r--r--gmp/mpn/powerpc32/diveby3.asm93
-rw-r--r--gmp/mpn/powerpc32/divrem_2.asm182
-rw-r--r--gmp/mpn/powerpc32/eabi.m486
-rw-r--r--gmp/mpn/powerpc32/elf.m497
-rw-r--r--gmp/mpn/powerpc32/gmp-mparam.h217
-rw-r--r--gmp/mpn/powerpc32/invert_limb.asm142
-rw-r--r--gmp/mpn/powerpc32/lshift.asm166
-rw-r--r--gmp/mpn/powerpc32/lshiftc.asm168
-rw-r--r--gmp/mpn/powerpc32/mod_34lsub1.asm145
-rw-r--r--gmp/mpn/powerpc32/mode1o.asm127
-rw-r--r--gmp/mpn/powerpc32/mul_1.asm101
-rw-r--r--gmp/mpn/powerpc32/p3-p7/aors_n.asm186
-rw-r--r--gmp/mpn/powerpc32/p3/gmp-mparam.h155
-rw-r--r--gmp/mpn/powerpc32/p4/gmp-mparam.h204
-rw-r--r--gmp/mpn/powerpc32/p5/gmp-mparam.h156
-rw-r--r--gmp/mpn/powerpc32/p6/gmp-mparam.h165
-rw-r--r--gmp/mpn/powerpc32/p7/gmp-mparam.h159
-rw-r--r--gmp/mpn/powerpc32/powerpc-defs.m4104
-rw-r--r--gmp/mpn/powerpc32/rshift.asm164
-rw-r--r--gmp/mpn/powerpc32/sec_tabselect.asm141
-rw-r--r--gmp/mpn/powerpc32/sqr_diag_addlsh1.asm80
-rw-r--r--gmp/mpn/powerpc32/sublsh1_n.asm101
-rw-r--r--gmp/mpn/powerpc32/submul_1.asm147
-rw-r--r--gmp/mpn/powerpc32/umul.asm50
-rw-r--r--gmp/mpn/powerpc32/vmx/copyd.asm203
-rw-r--r--gmp/mpn/powerpc32/vmx/copyi.asm198
-rw-r--r--gmp/mpn/powerpc32/vmx/logops_n.asm310
-rw-r--r--gmp/mpn/powerpc32/vmx/mod_34lsub1.asm386
-rw-r--r--gmp/mpn/powerpc32/vmx/popcount.asm34
40 files changed, 5942 insertions, 0 deletions
diff --git a/gmp/mpn/powerpc32/750/com.asm b/gmp/mpn/powerpc32/750/com.asm
new file mode 100644
index 0000000000..1b8b574b9c
--- /dev/null
+++ b/gmp/mpn/powerpc32/750/com.asm
@@ -0,0 +1,79 @@
+dnl PowerPC 750 mpn_com -- mpn bitwise one's complement
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 2.0
+C 7400,7410 (G4): 2.0
+C 744x,745x (G4+): 3.0
+
+C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C This loop form is necessary for the claimed speed.
+
+ASM_START()
+PROLOGUE(mpn_com)
+
+ C r3 dst
+ C r4 src
+ C r5 size
+
+ mtctr r5 C size
+ lwz r5, 0(r4) C src low limb
+
+ sub r4, r4, r3 C src-dst
+ subi r3, r3, 4 C dst-4
+
+ addi r4, r4, 8 C src-dst+8
+ bdz L(one)
+
+L(top):
+ C r3 &dst[i-1]
+ C r4 src-dst
+ C r5 src[i]
+ C r6 scratch
+
+ not r6, r5 C ~src[i]
+ lwzx r5, r4,r3 C src[i+1]
+
+ stwu r6, 4(r3) C dst[i]
+ bdnz L(top)
+
+L(one):
+ not r6, r5
+
+ stw r6, 4(r3) C dst[size-1]
+ blr
+
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/750/gmp-mparam.h b/gmp/mpn/powerpc32/750/gmp-mparam.h
new file mode 100644
index 0000000000..3667e8596d
--- /dev/null
+++ b/gmp/mpn/powerpc32/750/gmp-mparam.h
@@ -0,0 +1,192 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2002, 2004, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+
+/* This file is used for 75x (G3) and for 7400/7410 (G4), both which have
+ much slow multiply instructions. */
+
+/* 450 MHz PPC 7400 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 11
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 38
+#define USE_PREINV_DIVREM_1 1
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 38
+#define MUL_TOOM44_THRESHOLD 99
+#define MUL_TOOM6H_THRESHOLD 141
+#define MUL_TOOM8H_THRESHOLD 212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 57
+#define SQR_TOOM4_THRESHOLD 142
+#define SQR_TOOM6_THRESHOLD 173
+#define SQR_TOOM8_THRESHOLD 309
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 11
+
+#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 220, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 23, 9}, \
+ { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
+ { 39, 8}, { 23, 9}, { 15, 8}, { 39, 9}, \
+ { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
+ { 67, 9}, { 55,10}, { 31, 9}, { 63, 8}, \
+ { 127, 7}, { 255, 9}, { 71, 8}, { 143, 7}, \
+ { 287, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 127, 8}, { 255, 9}, \
+ { 143, 8}, { 287,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 175, 8}, { 351, 7}, { 703,10}, \
+ { 95, 9}, { 191, 8}, { 383, 9}, { 207,10}, \
+ { 111,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351, 8}, { 703,11}, \
+ { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \
+ { 415, 8}, { 831,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 351, 9}, \
+ { 703, 8}, { 1407,11}, { 191,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447, 9}, { 895,12}, \
+ { 127,11}, { 255,10}, { 543,11}, { 287,10}, \
+ { 575,11}, { 351,10}, { 703, 9}, { 1407,12}, \
+ { 191,11}, { 415,10}, { 831,11}, { 447,10}, \
+ { 895,13}, { 127,12}, { 255,11}, { 543,10}, \
+ { 1087,11}, { 575,12}, { 319,11}, { 703,10}, \
+ { 1407,12}, { 383,11}, { 831,12}, { 447,11}, \
+ { 895,10}, { 1791,11}, { 959,13}, { 255,12}, \
+ { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \
+ { 703,11}, { 1407,13}, { 383,12}, { 895,11}, \
+ { 1791,12}, { 959,14}, { 255,13}, { 511,12}, \
+ { 1215,13}, { 639,12}, { 1407,13}, { 895,12}, \
+ { 1919,14}, { 511,13}, { 1023,12}, { 2047,13}, \
+ { 1151,12}, { 2303,13}, { 1407,14}, { 767,13}, \
+ { 1919,10}, { 15359,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 154
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 184 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 184, 5}, { 6, 4}, { 13, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 31, 8}, { 19, 7}, { 39, 8}, { 27, 9}, \
+ { 15, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \
+ { 127, 7}, { 255, 9}, { 71, 8}, { 143, 7}, \
+ { 287, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 143, 8}, { 287, 7}, { 575,10}, \
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175, 8}, \
+ { 351,10}, { 95, 9}, { 191, 8}, { 383, 9}, \
+ { 207,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 143, 9}, { 287, 8}, { 575,10}, \
+ { 159, 9}, { 319,10}, { 175, 9}, { 351,11}, \
+ { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \
+ { 415, 8}, { 831,10}, { 223,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,12}, { 127,11}, { 255,10}, \
+ { 511,11}, { 287,10}, { 575,11}, { 319,10}, \
+ { 639,11}, { 351,10}, { 703, 9}, { 1407,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,11}, { 447,10}, { 895,13}, { 127,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 575,12}, \
+ { 319,11}, { 703,10}, { 1407,12}, { 383,11}, \
+ { 831,12}, { 447,11}, { 895,10}, { 1791,11}, \
+ { 959,13}, { 255,12}, { 511,11}, { 1023,12}, \
+ { 575,11}, { 1215,12}, { 703,11}, { 1407,13}, \
+ { 383,12}, { 895,11}, { 1791,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1215,13}, { 639,12}, \
+ { 1471,13}, { 767,12}, { 1535,13}, { 895,12}, \
+ { 1919,14}, { 511,13}, { 1151,12}, { 2431,13}, \
+ { 1407,14}, { 767,13}, { 1919,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD 1728
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 31
+#define DC_DIVAPPR_Q_THRESHOLD 108
+#define DC_BDIV_QR_THRESHOLD 35
+#define DC_BDIV_Q_THRESHOLD 88
+
+#define INV_MULMOD_BNM1_THRESHOLD 42
+#define INV_NEWTON_THRESHOLD 149
+#define INV_APPR_THRESHOLD 125
+
+#define BINV_NEWTON_THRESHOLD 156
+#define REDC_1_TO_REDC_N_THRESHOLD 39
+
+#define MU_DIV_QR_THRESHOLD 807
+#define MU_DIVAPPR_Q_THRESHOLD 807
+#define MUPI_DIV_QR_THRESHOLD 66
+#define MU_BDIV_QR_THRESHOLD 667
+#define MU_BDIV_Q_THRESHOLD 807
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 87
+#define GCD_DC_THRESHOLD 233
+#define GCDEXT_DC_THRESHOLD 198
+#define JACOBI_BASE_METHOD 1
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 390
+#define SET_STR_PRECOMPUTE_THRESHOLD 814
diff --git a/gmp/mpn/powerpc32/750/lshift.asm b/gmp/mpn/powerpc32/750/lshift.asm
new file mode 100644
index 0000000000..3a1c1a7212
--- /dev/null
+++ b/gmp/mpn/powerpc32/750/lshift.asm
@@ -0,0 +1,155 @@
+dnl PowerPC 750 mpn_lshift -- mpn left shift.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 750: 3.0
+C 7400: 3.0
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+
+ C r3 dst
+ C r4 src
+ C r5 size
+ C r6 shift
+
+ mtctr r5 C size
+ slwi r5, r5, 2 C 4*size
+
+ subfic r7, r6, 32 C 32-shift
+ add r4, r4, r5 C &src[size]
+
+ add r5, r3, r5 C &dst[size]
+ lwz r8, -4(r4) C src[size-1]
+ bdz L(one)
+
+ lwzu r9, -8(r4) C src[size-2]
+
+ srw r3, r8, r7 C return value
+ slw r8, r8, r6 C src[size-1] << shift
+ bdz L(two)
+
+
+L(top):
+ C r3 return value
+ C r4 src, incrementing
+ C r5 dst, incrementing
+ C r6 lshift
+ C r7 32-shift
+ C r8 src[i+1] << shift
+ C r9 src[i]
+ C r10
+
+ lwzu r10, -4(r4)
+ srw r11, r9, r7
+
+ or r8, r8, r11
+ stwu r8, -4(r5)
+
+ slw r8, r9, r6
+ bdz L(odd)
+
+ C r8 src[i+1] << shift
+ C r9
+ C r10 src[i]
+
+ lwzu r9, -4(r4)
+ srw r11, r10, r7
+
+ or r8, r8, r11
+ stwu r8, -4(r5)
+
+ slw r8, r10, r6
+ bdnz L(top)
+
+
+L(two):
+ C r3 return value
+ C r4
+ C r5 &dst[2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[1] << shift
+ C r9 src[0]
+ C r10
+
+ srw r11, r9, r7
+ slw r12, r9, r6 C src[0] << shift
+
+ or r8, r8, r11
+ stw r12, -8(r5) C dst[0]
+
+ stw r8, -4(r5) C dst[1]
+ blr
+
+
+L(odd):
+ C r3 return value
+ C r4
+ C r5 &dst[2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[1] << shift
+ C r9
+ C r10 src[0]
+
+ srw r11, r10, r7
+ slw r12, r10, r6
+
+ or r8, r8, r11
+ stw r12, -8(r5) C dst[0]
+
+ stw r8, -4(r5) C dst[1]
+ blr
+
+
+L(one):
+ C r5 &dst[1]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[0]
+
+ srw r3, r8, r7 C return value
+ slw r8, r8, r6 C src[size-1] << shift
+
+ stw r8, -4(r5) C dst[0]
+ blr
+
+EPILOGUE(mpn_lshift)
diff --git a/gmp/mpn/powerpc32/750/rshift.asm b/gmp/mpn/powerpc32/750/rshift.asm
new file mode 100644
index 0000000000..4825fee618
--- /dev/null
+++ b/gmp/mpn/powerpc32/750/rshift.asm
@@ -0,0 +1,153 @@
+dnl PowerPC 750 mpn_rshift -- mpn right shift.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 750: 3.0
+C 7400: 3.0
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+
+ C r3 dst
+ C r4 src
+ C r5 size
+ C r6 shift
+
+ mtctr r5 C size
+ lwz r8, 0(r4) C src[0]
+
+ subfic r7, r6, 32 C 32-shift
+ addi r5, r3, -4 C dst-4
+
+ slw r3, r8, r7 C return value
+ bdz L(one)
+
+ lwzu r9, 4(r4) C src[1]
+ srw r8, r8, r6 C src[0] >> shift
+ bdz L(two)
+
+
+L(top):
+ C r3 return value
+ C r4 src, incrementing
+ C r5 dst, incrementing
+ C r6 shift
+ C r7 32-shift
+ C r8 src[i-1] >> shift
+ C r9 src[i]
+ C r10
+
+ lwzu r10, 4(r4)
+ slw r11, r9, r7
+
+ or r8, r8, r11
+ stwu r8, 4(r5)
+
+ srw r8, r9, r6
+ bdz L(odd)
+
+ C r8 src[i-1] >> shift
+ C r9
+ C r10 src[i]
+
+ lwzu r9, 4(r4)
+ slw r11, r10, r7
+
+ or r8, r8, r11
+ stwu r8, 4(r5)
+
+ srw r8, r10, r6
+ bdnz L(top)
+
+
+L(two):
+ C r3 return value
+ C r4
+ C r5 &dst[size-2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[size-2] >> shift
+ C r9 src[size-1]
+ C r10
+
+ slw r11, r9, r7
+ srw r12, r9, r6 C src[size-1] >> shift
+
+ or r8, r8, r11
+ stw r12, 8(r5) C dst[size-1]
+
+ stw r8, 4(r5) C dst[size-2]
+ blr
+
+
+L(odd):
+ C r3 return value
+ C r4
+ C r5 &dst[size-2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[size-2] >> shift
+ C r9
+ C r10 src[size-1]
+
+ slw r11, r10, r7
+ srw r12, r10, r6
+
+ or r8, r8, r11
+ stw r12, 8(r5) C dst[size-1]
+
+ stw r8, 4(r5) C dst[size-2]
+ blr
+
+
+L(one):
+ C r3 return value
+ C r4
+ C r5 dst-4
+ C r6 shift
+ C r7
+ C r8 src[0]
+
+ srw r8, r8, r6
+
+ stw r8, 4(r5) C dst[0]
+ blr
+
+EPILOGUE(mpn_rshift)
diff --git a/gmp/mpn/powerpc32/README b/gmp/mpn/powerpc32/README
new file mode 100644
index 0000000000..887e78b290
--- /dev/null
+++ b/gmp/mpn/powerpc32/README
@@ -0,0 +1,180 @@
+Copyright 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+
+ POWERPC 32-BIT MPN SUBROUTINES
+
+
+This directory contains mpn functions for various 32-bit PowerPC chips.
+
+
+CODE ORGANIZATION
+
+ directory used for
+ ================================================
+ powerpc generic, 604, 604e, 744x, 745x
+ powerpc/750 740, 750, 7400, 7410
+
+
+The top-level powerpc directory is currently mostly aimed at 604/604e but
+should be reasonable on all powerpcs.
+
+
+
+STATUS
+
+The code is quite well optimized for the 604e, other chips have had less
+attention.
+
+Altivec SIMD available in 74xx might hold some promise, but unfortunately
+GMP only guarantees 32-bit data alignment, so there's lots of fiddling
+around with partial operations at the start and end of limb vectors. A
+128-bit limb would be a novel idea, but is unlikely to be practical, since
+it would have to work with ordinary +, -, * etc in the C code.
+
+Also, Altivec isn't very well suited for the GMP multiplication needs.
+Using floating-point based multiplication has much better better performance
+potential for all current powerpcs, both the ones with slow integer multiply
+units (603, 740, 750, 7400, 7410) and those with fast (604, 604e, 744x,
+745x). This is because all powerpcs do some level of pipelining in the FPU:
+
+603 and 750 can sustain one fmadd every 2nd cycle.
+604 and 604e can sustain one fmadd per cycle.
+7400 and 7410 can sustain 3 fmadd in 4 cycles.
+744x and 745x can sustain 4 fmadd in 5 cycles.
+
+
+
+REGISTER NAMES
+
+The normal powerpc convention is to give registers as plain numbers, like
+"mtctr 6", but on Apple MacOS X (powerpc*-*-rhapsody* and
+powerpc*-*-darwin*) the assembler demands an "r" like "mtctr r6". Note
+however when register 0 in an instruction means a literal zero the "r" is
+omitted, for instance "lwzx r6,0,r7".
+
+The GMP code uses the "r" forms, powerpc-defs.m4 transforms them to plain
+numbers according to what GMP_ASM_POWERPC_R_REGISTERS finds is needed.
+(Note that this style isn't fully general, as the identifier r4 and the
+register r4 will not be distinguishable on some systems. However, this is
+not a problem for the limited GMP assembly usage.)
+
+
+
+GLOBAL REFERENCES
+
+Linux non-PIC
+ lis 9, __gmp_binvert_limb_table@ha
+ rlwinm 11, 5, 31, 25, 31
+ la 9, __gmp_binvert_limb_table@l(9)
+ lbzx 11, 9, 11
+
+Linux PIC (FIXME)
+.LCL0:
+ .long .LCTOC1-.LCF0
+ bcl 20, 31, .LCF0
+.LCF0:
+ mflr 30
+ lwz 7, .LCL0-.LCF0(30)
+ add 30, 7, 30
+ lwz 11, .LC0-.LCTOC1(30)
+ rlwinm 3, 5, 31, 25, 31
+ lbzx 7, 11, 3
+
+AIX (always PIC)
+LC..0:
+ .tc __gmp_binvert_limb_table[TC],__gmp_binvert_limb_table[RW]
+ lwz 9, LC..0(2)
+ rlwinm 0, 5, 31, 25, 31
+ lbzx 0, 9, 0
+
+Darwin (non-PIC)
+ lis r2, ha16(___gmp_binvert_limb_table)
+ rlwinm r9, r5, 31, 25, 31
+ la r2, lo16(___gmp_binvert_limb_table)(r2)
+ lbzx r0, r2, r9
+Darwin (PIC)
+ mflr r0
+ bcl 20, 31, L0001$pb
+L0001$pb:
+ mflr r7
+ mtlr r0
+ addis r2, r7, ha16(L___gmp_binvert_limb_table$non_lazy_ptr-L0001$pb)
+ rlwinm r9, r5, 31, 25, 31
+ lwz r2, lo16(L___gmp_binvert_limb_table$non_lazy_ptr-L0001$pb)(r2)
+ lbzx r0, r2, r9
+------
+ .non_lazy_symbol_pointer
+L___gmp_binvert_limb_table$non_lazy_ptr:
+ .indirect_symbol ___gmp_binvert_limb_table
+ .long 0
+ .subsections_via_symbols
+
+
+For GNU/Linux and Darwin, we might want to duplicate __gmp_binvert_limb_table
+into the text section in this file. We should thus be able to reach it like
+this:
+
+ blr L0
+L0: mflr r2
+ rlwinm r9, r5, 31, 25, 31
+ addi r9, r9, lo16(local_binvert_table-L0)
+ lbzx r0, r2, r9
+
+
+
+REFERENCES
+
+PowerPC Microprocessor Family: The Programming Environments for 32-bit
+Microprocessors, IBM document G522-0290-01, 2000.
+
+PowerPC 604e RISC Microprocessor User's Manual with Supplement for PowerPC
+604 Microprocessor, IBM document G552-0330-00, Freescale document
+MPC604EUM/AD, 3/1998.
+
+MPC7410/MPC7400 RISC Microprocessor User's Manual, Freescale document
+MPC7400UM/D, rev 1, 11/2002.
+
+MPC7450 RISC Microprocessor Family Reference Manual, Freescale document
+MPC7450UM, rev 5, 1/2005.
+
+The above are available online from
+
+ http://www.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC
+ http://www.freescale.com/PowerPC
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/gmp/mpn/powerpc32/addlsh1_n.asm b/gmp/mpn/powerpc32/addlsh1_n.asm
new file mode 100644
index 0000000000..71645c3ec3
--- /dev/null
+++ b/gmp/mpn/powerpc32/addlsh1_n.asm
@@ -0,0 +1,100 @@
+dnl PowerPC-32 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+
+dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 4.0
+C 75x (G3): 5.0
+C 7400,7410 (G4): 5.0
+C 744x,745x (G4+): 5.0
+C power4/ppc970: 4.25
+C power5: 5.0
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C vp r5
+C n r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_addlsh1_n)
+ mtctr r6 C copy n in ctr
+ addic r31, r31, 0 C clear cy
+
+ lwz v0, 0(vp) C load v limb
+ lwz u0, 0(up) C load u limb
+ addi up, up, -4 C update up
+ addi rp, rp, -4 C update rp
+ slwi s1, v0, 1
+ bdz L(end) C If done, skip loop
+
+L(loop):
+ lwz v1, 4(vp) C load v limb
+ adde s1, s1, u0 C add limbs with cy, set cy
+ srwi s0, v0, 31 C shift down previous v limb
+ stw s1, 4(rp) C store result limb
+ lwzu u0, 8(up) C load u limb and update up
+ rlwimi s0, v1, 1, 0,30 C left shift v limb and merge with prev v limb
+
+ bdz L(exit) C decrement ctr and exit if done
+
+ lwzu v0, 8(vp) C load v limb and update vp
+ adde s0, s0, u0 C add limbs with cy, set cy
+ srwi s1, v1, 31 C shift down previous v limb
+ stwu s0, 8(rp) C store result limb and update rp
+ lwz u0, 4(up) C load u limb
+ rlwimi s1, v0, 1, 0,30 C left shift v limb and merge with prev v limb
+
+ bdnz L(loop) C decrement ctr and loop back
+
+L(end): adde r7, s1, u0
+ srwi r4, v0, 31
+ stw r7, 4(rp) C store last result limb
+ addze r3, r4
+ blr
+L(exit):
+ adde r7, s0, u0
+ srwi r4, v1, 31
+ stw r7, 8(rp) C store last result limb
+ addze r3, r4
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/addmul_1.asm b/gmp/mpn/powerpc32/addmul_1.asm
new file mode 100644
index 0000000000..7f47ab2ce7
--- /dev/null
+++ b/gmp/mpn/powerpc32/addmul_1.asm
@@ -0,0 +1,155 @@
+dnl PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright 1995, 1997, 1998, 2000-2003, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 6.75
+C 75x (G3): 8.7-14.3
+C 7400,7410 (G4): 8.7-14.3
+C 744x,745x (G4+): 9.5
+C power4/ppc970: 6.25
+C power5: 6.25
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C vl r6
+
+C This is optimized for the PPC604. It has not been tuned for other
+C PowerPC processors.
+C
+C Loop Analysis for the 604:
+C 12 mem insn
+C 8 serializing insn
+C 8 int multiply
+C 25 int reg write
+C 9 int ops (8 of which serialize)
+C
+C The multiply insns need 16 cycles/4limb.
+C The integer register writes will need 13 cycles/4limb.
+C All-in-all, it should be possible to get to 4 or 5 cycles/limb on PPC604,
+C but that will require some clever FPNOPS and BNOPS for exact
+C issue control.
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ cmpwi cr0,r5,9 C more than 9 limbs?
+ bgt cr0,L(big) C branch if more than 9 limbs
+
+ mtctr r5
+ lwz r0,0(r4)
+ mullw r7,r0,r6
+ mulhwu r10,r0,r6
+ lwz r9,0(r3)
+ addc r8,r7,r9
+ addi r3,r3,-4
+ bdz L(end)
+L(loop):
+ lwzu r0,4(r4)
+ stwu r8,4(r3)
+ mullw r8,r0,r6
+ adde r7,r8,r10
+ mulhwu r10,r0,r6
+ lwz r9,4(r3)
+ addze r10,r10
+ addc r8,r7,r9
+ bdnz L(loop)
+L(end): stw r8,4(r3)
+ addze r3,r10
+ blr
+
+L(big): stmw r30,-32(r1)
+ addi r5,r5,-1
+ srwi r0,r5,2
+ mtctr r0
+
+ lwz r7,0(r4)
+ mullw r8,r7,r6
+ mulhwu r0,r7,r6
+ lwz r7,0(r3)
+ addc r8,r8,r7
+ stw r8,0(r3)
+
+L(loopU):
+ lwz r7,4(r4)
+ lwz r12,8(r4)
+ lwz r30,12(r4)
+ lwzu r31,16(r4)
+ mullw r8,r7,r6
+ mullw r9,r12,r6
+ mullw r10,r30,r6
+ mullw r11,r31,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ adde r9,r9,r0
+ mulhwu r0,r12,r6
+ lwz r12,8(r3)
+ adde r10,r10,r0
+ mulhwu r0,r30,r6
+ lwz r30,12(r3)
+ adde r11,r11,r0
+ mulhwu r0,r31,r6
+ lwz r31,16(r3)
+ addze r0,r0 C new cy_limb
+ addc r8,r8,r7
+ stw r8,4(r3)
+ adde r9,r9,r12
+ stw r9,8(r3)
+ adde r10,r10,r30
+ stw r10,12(r3)
+ adde r11,r11,r31
+ stwu r11,16(r3)
+ bdnz L(loopU)
+
+ andi. r31,r5,3
+ mtctr r31
+ beq cr0,L(endx)
+
+L(loopE):
+ lwzu r7,4(r4)
+ mullw r8,r7,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ addze r0,r0 C new cy_limb
+ addc r8,r8,r7
+ stwu r8,4(r3)
+ bdnz L(loopE)
+L(endx):
+ addze r3,r0
+ lmw r30,-32(r1)
+ blr
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp/mpn/powerpc32/aix.m4 b/gmp/mpn/powerpc32/aix.m4
new file mode 100644
index 0000000000..fde20200b2
--- /dev/null
+++ b/gmp/mpn/powerpc32/aix.m4
@@ -0,0 +1,82 @@
+divert(-1)
+dnl m4 macros for AIX 32-bit assembly.
+
+dnl Copyright 2000-2002, 2005, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+define(`ASM_START',
+` .toc')
+
+dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl Don't want ELF style .size in the epilogue.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+ `
+ .globl $1
+ .globl .$1
+ .csect [DS], 2
+$1:
+ .long .$1, TOC[tc0], 0
+ .csect [PR]
+ .align 2
+.$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+`')
+
+define(`TOC_ENTRY', `')
+
+define(`LEA',
+m4_assert_numargs(2)
+`define(`TOC_ENTRY',
+` .toc
+tc$2:
+ .tc $2[TC], $2')'
+` lwz $1, tc$2(2)')
+
+define(`EXTERN',
+m4_assert_numargs(1)
+` .globl $1')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+` .csect [RO], 3
+ ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1))
+
+define(`ASM_END', `TOC_ENTRY')
+
+divert
diff --git a/gmp/mpn/powerpc32/aors_n.asm b/gmp/mpn/powerpc32/aors_n.asm
new file mode 100644
index 0000000000..25ece0966e
--- /dev/null
+++ b/gmp/mpn/powerpc32/aors_n.asm
@@ -0,0 +1,157 @@
+dnl PowerPC-32 mpn_add_n and mpn_sub_n.
+
+dnl Copyright 2002, 2005, 2007 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: ? old: 3.25
+C 75x (G3): ? old: 3.5
+C 7400,7410 (G4): 3.25
+C 744x,745x (G4+): 4
+C POWER3/PPC630 2
+C POWER4/PPC970 2.4
+C POWER5 2.75
+C POWER6 40-140
+C POWER7 3
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n', `r6')
+define(`cy', `r7')
+
+ifdef(`OPERATION_add_n', `
+ define(ADCSBC, adde)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)
+ define(IFADD, `$1')
+ define(IFSUB, `')')
+ifdef(`OPERATION_sub_n', `
+ define(ADCSBC, subfe)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)
+ define(IFADD, `')
+ define(IFSUB, `$1')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+
+PROLOGUE(func_nc)
+IFADD(` addic r0, cy, -1') C set carry from argument
+IFSUB(` subfic r0, cy, 0') C set carry from argument
+ b L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+IFADD(` addic r0, n, 0') C clear carry
+IFSUB(` addic r0, n, -1') C set carry
+L(ent): andi. r0, n, 3
+ addi r3, r3, -12
+ addi n, n, 1
+ cmpwi cr7, r0, 2
+ srwi r0, n, 2
+ sub r4, r4, r3
+ sub r5, r5, r3
+ mtctr r0
+ bne cr0, L(n00)
+
+ lwzx r7, r4, r3 C n = 4, 8, 12, ...
+ lwzx r8, r5, r3
+ addi r3, r3, 4
+ lwzx r9, r4, r3
+ ADCSBC r7, r8, r7
+ lwzx r10, r5, r3
+ addi r3, r3, 4
+ b L(00)
+
+L(n00): bge cr7, L(n01)
+ cmpwi cr0, r0, 0 C n = 1, 5, 9, 13, ...
+ lwzx r0, r4, r3
+ lwzx r6, r5, r3
+ addi r3, r3, 4
+ ADCSBC r0, r6, r0
+ ble L(ret)
+L(gt1): lwzx r7, r4, r3
+ lwzx r8, r5, r3
+ addi r3, r3, 4
+ b L(01)
+
+L(n10):
+ lwzx r9, r4, r3 C n = 3, 7, 11, 15, ...
+ lwzx r10, r5, r3
+ addi r3, r3, 4
+ lwzx r11, r4, r3
+ ADCSBC r9, r10, r9
+ lwzx r12, r5, r3
+ addi r3, r3, 4
+ b L(11)
+
+L(n01): bne cr7, L(n10)
+ cmpwi cr0, r0, 0 C n = 2, 6, 10, 14, ...
+ lwzx r11, r4, r3
+ lwzx r12, r5, r3
+ addi r3, r3, 4
+ lwzx r0, r4, r3
+ ADCSBC r11, r12, r11
+ lwzx r6, r5, r3
+ addi r3, r3, 4
+ ble cr0, L(end)
+
+
+L(lp): lwzx r7, r4, r3
+ ADCSBC r0, r6, r0
+ lwzx r8, r5, r3
+ stwu r11, 4(r3)
+L(01): lwzx r9, r4, r3
+ ADCSBC r7, r8, r7
+ lwzx r10, r5, r3
+ stwu r0, 4(r3)
+L(00): lwzx r11, r4, r3
+ ADCSBC r9, r10, r9
+ lwzx r12, r5, r3
+ stwu r7, 4(r3)
+L(11): lwzx r0, r4, r3
+ ADCSBC r11, r12, r11
+ lwzx r6, r5, r3
+ stwu r9, 4(r3)
+ bdnz L(lp)
+
+L(end): ADCSBC r0, r6, r0
+ stw r11, 4(r3)
+L(ret): stw r0, 8(r3)
+IFADD(` li r3, 0 ')
+IFADD(` addze r3, r3 ')
+IFSUB(` subfe r3, r0, r0')
+IFSUB(` neg r3, r3')
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/bdiv_dbm1c.asm b/gmp/mpn/powerpc32/bdiv_dbm1c.asm
new file mode 100644
index 0000000000..72b2c482e4
--- /dev/null
+++ b/gmp/mpn/powerpc32/bdiv_dbm1c.asm
@@ -0,0 +1,131 @@
+dnl PPC32 mpn_bdiv_dbm1c.
+
+dnl Copyright 2008 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: ?
+C 75x (G3): ?
+C 7400,7410 (G4): 9.43
+C 744x,745x (G4+): 6.28
+C power4/ppc970: ?
+C power5: ?
+
+C TODO
+C * Nothing to do...
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`bd', `r6')
+define(`cy', `r7')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+ lwz r0, 0(r4)
+
+ rlwinm. r12, r5, 0,30,31
+ cmplwi cr6, r12, 2
+ cmplwi cr7, r5, 4
+ addi r5, r5, 1
+ srwi r5, r5, 2
+ mtctr r5
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): mullw r5, r0, r6
+ mulhwu r12, r0, r6
+ lwz r0, 4(r4)
+ addi r4, r4, -12
+ addi r3, r3, -12
+ b L(3)
+
+L(b00): mullw r9, r0, r6
+ mulhwu r8, r0, r6
+ lwz r0, 4(r4)
+ addi r4, r4, -8
+ addi r3, r3, -8
+ b L(0)
+
+L(b01): mullw r5, r0, r6
+ mulhwu r12, r0, r6
+ addi r3, r3, -4
+ ble cr7, L(e1)
+ lwz r0, 4(r4)
+ addi r4, r4, -4
+ b L(1)
+
+L(b10): mullw r9, r0, r6
+ mulhwu r8, r0, r6
+ lwz r0, 4(r4)
+ ble cr7, L(e2)
+
+ ALIGN(16)
+L(top): mullw r5, r0, r6
+ mulhwu r12, r0, r6
+ subfc r11, r9, r7
+ lwz r0, 8(r4)
+ subfe r7, r8, r11
+ stw r11, 0(r3)
+L(1): mullw r9, r0, r6
+ mulhwu r8, r0, r6
+ subfc r11, r5, r7
+ lwz r0, 12(r4)
+ subfe r7, r12, r11
+ stw r11, 4(r3)
+L(0): mullw r5, r0, r6
+ mulhwu r12, r0, r6
+ subfc r11, r9, r7
+ lwz r0, 16(r4)
+ subfe r7, r8, r11
+ stw r11, 8(r3)
+L(3): mullw r9, r0, r6
+ mulhwu r8, r0, r6
+ subfc r11, r5, r7
+ lwz r0, 20(r4)
+ subfe r7, r12, r11
+ stw r11, 12(r3)
+ addi r4, r4, 16
+ addi r3, r3, 16
+ bdnz L(top)
+
+L(e2): mullw r5, r0, r6
+ mulhwu r12, r0, r6
+ subfc r11, r9, r7
+ subfe r7, r8, r11
+ stw r11, 0(r3)
+L(e1): subfc r11, r5, r7
+ stw r11, 4(r3)
+ subfe r3, r12, r11
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/darwin.m4 b/gmp/mpn/powerpc32/darwin.m4
new file mode 100644
index 0000000000..db4226800b
--- /dev/null
+++ b/gmp/mpn/powerpc32/darwin.m4
@@ -0,0 +1,91 @@
+divert(-1)
+dnl m4 macros for Mac OS 32-bit assembly.
+
+dnl Copyright 2005, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs_range(1,2)
+`ifelse(`$2',toc,,
+`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
+ .text
+ .globl $1
+ .align 3
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1))
+
+
+dnl LEA -- Load Effective Address.
+
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',
+` mflr r0 C save return address
+ bcl 20, 31, 1f
+1: mflr $1
+ addis $1, $1, ha16($2-1b)
+ la $1, lo16($2-1b)($1)
+ mtlr r0 C restore return address
+',`
+ lis $1, ha16($2)
+ la $1, lo16($2)($1)
+')')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`LEA($1,$2)')
+
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+` .const
+ ALIGN(ifelse($#,1,2,$2))
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1))
+
+define(`ASM_END', `dnl')
+
+ifdef(`PIC',`
+define(`PIC_SLOW')')
+
+divert
diff --git a/gmp/mpn/powerpc32/diveby3.asm b/gmp/mpn/powerpc32/diveby3.asm
new file mode 100644
index 0000000000..288a7d30ac
--- /dev/null
+++ b/gmp/mpn/powerpc32/diveby3.asm
@@ -0,0 +1,93 @@
+dnl PowerPC-32 mpn_divexact_by3 -- mpn by 3 exact division
+
+dnl Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 5
+C 75x (G3): ?
+C 7400,7410 (G4): 8
+C 744x,745x (G4+): 6
+C power4/ppc970: 12
+C power5: ?
+
+C void mpn_divexact_by3 (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C We avoid the slow subfe instruction and instead rely on an extremely unlikely
+C branch.
+C
+C The mullw has the inverse in the first operand, since 0xAA..AB won't allow
+C any early-out. The src[] data normally won't either, but there's at least
+C a chance, whereas 0xAA..AB never will. If, for instance, src[] is all
+C zeros (not a sensible input of course) we run at 7.0 c/l on ppc750.
+C
+C The mulhwu has the "3" multiplier in the second operand, which lets 750 and
+C 7400 use an early-out.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cy', `r6')
+
+ASM_START()
+PROLOGUE(mpn_divexact_by3c)
+ lwz r11, 0(up)
+ mtctr n
+ lis r12, 0xAAAA
+ ori r12, r12, 0xAAAB
+ li r10, 3
+
+ cmplw cr7, cy, r11
+ subf r11, cy, r11
+
+ mullw r0, r11, r12
+ stw r0, 0(rp)
+ bdz L(one)
+
+L(top): lwzu r9, 4(up)
+ mulhwu r7, r0, r10
+ bgt- cr7, L(adj) C very unlikely branch
+L(bko): cmplw cr7, r7, r9
+ subf r0, r7, r9
+ mullw r0, r12, r0
+ stwu r0, 4(rp)
+ bdnz L(top)
+
+L(one): mulhwu r3, r0, r10
+ blelr+ cr7
+ addi r3, r3, 1
+ blr
+
+L(adj): addi r7, r7, 1
+ b L(bko)
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/powerpc32/divrem_2.asm b/gmp/mpn/powerpc32/divrem_2.asm
new file mode 100644
index 0000000000..c6e64efe23
--- /dev/null
+++ b/gmp/mpn/powerpc32/divrem_2.asm
@@ -0,0 +1,182 @@
+dnl PPC-32 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+
+dnl Copyright 2007, 2008, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C norm frac
+C 7410 ~36.5 ~36.5
+C 744x, 745x 29 29
+
+C INPUT PARAMETERS
+C qp = r3
+C fn = r4
+C up = r5
+C un = r6
+C d = r7
+
+C TODO
+C * Decrease register usage.
+C * Make sure mul operands and optimal for early-out.
+C * Check that things work well for a shared library build.
+C * Write an invert_limb, perhaps inline, perhaps as a private call. Or at
+C least vastly improve the current __udiv_qrnnd_c based code.
+
+
+ASM_START()
+PROLOGUE(mpn_divrem_2)
+ stwu r1, -32(r1)
+ slwi r0, r6, 2
+ add r5, r5, r0
+ stmw r28, 8(r1)
+ addi r29, r5, -8 C up = up_param + un - 2
+ lwz r10, 4(r7)
+ lwz r12, 4(r29)
+ addi r8, r3, -12
+ lwz r7, 0(r7)
+ cmplw cr7, r12, r10
+ lwz r28, 0(r29)
+ blt- cr7, L(2)
+ bgt+ cr7, L(4)
+ cmplw cr7, r28, r7
+ blt- cr7, L(2)
+L(4): subfc r28, r7, r28
+ subfe r12, r10, r12
+ li r3, 1
+ b L(6)
+L(2): li r3, 0
+
+L(6): add r0, r4, r6
+ addic. r30, r0, -2
+ ble- cr0, L(ret)
+
+ slwi r9, r0, 2
+ add r8, r8, r9 C rp += un + fn
+ mtctr r30
+
+C Compute di from d1
+ srwi r11, r10, 16
+ nor r0, r10, r10
+ divwu r31, r0, r11
+ rlwinm r5, r10, 0, 16, 31
+ mullw r9, r11, r31
+ mullw r6, r5, r31
+ subf r0, r9, r0
+ slwi r0, r0, 16
+ ori r0, r0, 65535
+ cmplw cr7, r0, r6
+ bge- cr7, L(9)
+ add r0, r0, r10
+ cmplw cr7, r0, r10
+ cmplw cr6, r6, r0
+ addi r31, r31, -1 C q1--
+ crorc 28, 28, 25
+ bc+ 12, 28, L(9)
+ addi r31, r31, -1 C q1--
+ add r0, r0, r10
+L(9): subf r0, r6, r0
+ divwu r6, r0, r11
+ mullw r9, r11, r6
+ mullw r11, r5, r6
+ subf r0, r9, r0
+ slwi r0, r0, 16
+ ori r0, r0, 65535
+ cmplw cr7, r0, r11
+ bge- cr7, L(13)
+ add r0, r0, r10
+ cmplw cr7, r0, r10
+ cmplw cr6, r11, r0
+ addi r6, r6, -1 C q0--
+ crorc 28, 28, 25
+ bc+ 12, 28, L(13)
+C add r0, r0, r10 C final remainder
+ addi r6, r6, -1 C q0--
+L(13): rlwimi r6, r31, 16, 0, 15 C assemble final quotient
+
+C Adjust di by including d0
+ mullw r9, r10, r6 C t0 = LO(di * d1)
+ addc r11, r9, r7
+ subfe r0, r1, r1
+ mulhwu r9, r6, r7 C s1 = HI(di * d0)
+ addc r9, r11, r9
+ addze. r0, r0
+ blt cr0, L(17)
+L(18): subfc r9, r10, r9
+ addi r6, r6, -1
+ addme. r0, r0
+ bge+ cr0, L(18)
+L(17):
+
+C r0 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r28 r29 r30 r31
+C msl di d0 qp d1 fn up un
+L(loop):
+ mullw r0, r12, r6 C q0 = LO(n2 * di)
+ cmpw cr7, r30, r4
+ addc r31, r0, r28 C q0 += n1
+ mulhwu r9, r12, r6 C q = HI(n2 * di)
+ adde r12, r9, r12 C q += n2
+ addi r30, r30, -1
+ mullw r0, r10, r12 C d1 * q
+ li r9, 0
+ subf r0, r0, r28 C n1 -= d1 * q
+ addi r5, r12, 1
+ ble- cr7, L(23)
+ lwzu r9, -4(r29)
+L(23): mullw r11, r12, r7 C t0 = LO(d0 * q)
+ subfc r28, r7, r9 C n0 -= d0
+ subfe r0, r10, r0 C n1 -= d1
+ mulhwu r12, r12, r7 C t1 = HI(d0 * q)
+ subfc r28, r11, r28 C n0 -= t0
+ subfe r12, r12, r0 C n1 -= t1
+ cmplw cr7, r12, r31
+ blt+ cr7, L(24)
+ addc r28, r28, r7
+ adde r12, r12, r10
+ addi r5, r5, -1
+L(24): cmplw cr7, r12, r10
+ bge- cr7, L(fix)
+L(bck): stw r5, 0(r8)
+ addi r8, r8, -4
+ bdnz L(loop)
+
+L(ret): stw r28, 0(r29)
+ stw r12, 4(r29)
+ lmw r28, 8(r1)
+ addi r1, r1, 32
+ blr
+
+L(fix): cmplw cr6, r28, r7
+ bgt+ cr7, L(28)
+ blt- cr6, L(bck)
+L(28): subfc r28, r7, r28
+ subfe r12, r10, r12
+ addi r5, r5, 1
+ b L(bck)
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/eabi.m4 b/gmp/mpn/powerpc32/eabi.m4
new file mode 100644
index 0000000000..cd7633c633
--- /dev/null
+++ b/gmp/mpn/powerpc32/eabi.m4
@@ -0,0 +1,86 @@
+divert(-1)
+dnl m4 macros for powerpc32 eABI assembly.
+
+dnl Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+ `
+ .section ".text"
+ .align 3
+ .globl $1
+ .type $1, @function
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+` .size $1, .-$1')
+
+dnl This ought to support PIC, but it is unclear how that is done for eABI
+define(`LEA',
+m4_assert_numargs(2)
+`
+ lis $1, $2@ha
+ la $1, $2@l($1)
+')
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`
+ .section .rodata
+ ALIGN(ifelse($#,1,2,$2))
+ .type $1, @object
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+` .size $1, .-$1')
+
+define(`ASM_END', `dnl')
+
+ifdef(`PIC',`
+define(`PIC_SLOW')')
+
+dnl 64-bit "long long" parameters are put in an even-odd pair, skipping an
+dnl even register if that was in turn. I wish somebody could explain why that
+dnl is a good idea.
+define(`BROKEN_LONGLONG_PARAM')
+
+divert
diff --git a/gmp/mpn/powerpc32/elf.m4 b/gmp/mpn/powerpc32/elf.m4
new file mode 100644
index 0000000000..a64a1271ff
--- /dev/null
+++ b/gmp/mpn/powerpc32/elf.m4
@@ -0,0 +1,97 @@
+divert(-1)
+dnl m4 macros for powerpc32 GNU/Linux assembly.
+
+dnl Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+define(`ASM_START',`')
+
+dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl EPILOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs_range(1,2)
+`ifelse(`$2',toc,,
+`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
+ .section ".text"
+ .align 3
+ .globl $1
+ .type $1, @function
+$1:')
+
+define(`EPILOGUE_cpu',
+m4_assert_numargs(1)
+` .size $1, .-$1')
+
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`
+ mflr r0
+ bl _GLOBAL_OFFSET_TABLE_@local-4
+ mflr $1
+ mtlr r0
+ lwz $1, $2@got($1)
+',`
+ lis $1, $2@ha
+ la $1, $2@l($1)
+')')
+
+define(`LEAL',
+m4_assert_numargs(2)
+`LEA($1,$2)')
+
+
+define(`EXTERN',
+m4_assert_numargs(1)
+`dnl')
+
+define(`DEF_OBJECT',
+m4_assert_numargs_range(1,2)
+`
+ .section .rodata
+ ALIGN(ifelse($#,1,2,$2))
+ .type $1, @object
+$1:
+')
+
+define(`END_OBJECT',
+m4_assert_numargs(1)
+` .size $1, .-$1')
+
+define(`ASM_END', `dnl')
+
+ifdef(`PIC',`
+define(`PIC_SLOW')')
+
+dnl 64-bit "long long" parameters are put in an even-odd pair, skipping an
+dnl even register if that was in turn. I wish somebody could explain why that
+dnl is a good idea.
+define(`BROKEN_LONGLONG_PARAM')
+
+divert
diff --git a/gmp/mpn/powerpc32/gmp-mparam.h b/gmp/mpn/powerpc32/gmp-mparam.h
new file mode 100644
index 0000000000..784a6d7b74
--- /dev/null
+++ b/gmp/mpn/powerpc32/gmp-mparam.h
@@ -0,0 +1,217 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2010, 2014 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+
+/* This file is supposed to be used for 604, 604e, 744x/745x/747x (G4+), i.e.,
+ 32-bit PowerPC processors with reasonably fast integer multiply insns. The
+ values below are chosen to be best for the latter processors, since 604 is
+ largely irrelevant today.
+
+ In mpn/powerpc32/750/gmp-mparam.h there are values for 75x (G3) and for
+ 7400/7410 (G4), both which have much slower multiply instructions. */
+
+/* 1417 MHz PPC 7447A */
+/* FFT tuning limit = 12500000 */
+/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 49
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1_NORM_THRESHOLD 2
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 69
+
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 73
+#define MUL_TOOM44_THRESHOLD 106
+#define MUL_TOOM6H_THRESHOLD 156
+#define MUL_TOOM8H_THRESHOLD 236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 71
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 72
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 82
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 22
+#define SQR_TOOM3_THRESHOLD 74
+#define SQR_TOOM4_THRESHOLD 130
+#define SQR_TOOM6_THRESHOLD 189
+#define SQR_TOOM8_THRESHOLD 284
+
+#define MULMID_TOOM42_THRESHOLD 32
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 14
+
+#define MUL_FFT_MODF_THRESHOLD 284 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 284, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 19, 7}, { 39, 8}, { 23, 7}, \
+ { 47, 8}, { 27, 9}, { 15, 8}, { 39, 9}, \
+ { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95,10}, { 31, 9}, { 71, 8}, { 143, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 9}, { 135, 8}, \
+ { 271, 9}, { 143,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 175,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207, 8}, { 415,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415, 8}, { 831,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271, 9}, { 543, 8}, { 1087,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 351, 9}, { 703,11}, { 191,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447, 9}, { 895,10}, \
+ { 479, 9}, { 959,12}, { 127,11}, { 255,10}, \
+ { 543, 9}, { 1087,11}, { 287,10}, { 575,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \
+ { 1407,12}, { 191,11}, { 383,10}, { 767,11}, \
+ { 415,10}, { 831,11}, { 447,10}, { 895,11}, \
+ { 479,10}, { 959,13}, { 127,12}, { 255,11}, \
+ { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \
+ { 639,10}, { 1279,11}, { 703,10}, { 1407,12}, \
+ { 383,11}, { 831,12}, { 447,11}, { 959,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,10}, { 2431,12}, { 639,11}, { 1279,12}, \
+ { 703,11}, { 1407,13}, { 383,12}, { 895,11}, \
+ { 1791,12}, { 959,14}, { 255,13}, { 511,12}, \
+ { 1215,11}, { 2431,13}, { 639,12}, { 1471,13}, \
+ { 767,12}, { 1599,13}, { 895,12}, { 1919,14}, \
+ { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
+ { 2431,13}, { 1407,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 160
+#define MUL_FFT_THRESHOLD 3712
+
+#define SQR_FFT_MODF_THRESHOLD 248 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 248, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 19, 7}, { 39, 8}, { 27, 9}, \
+ { 15, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 71, 8}, { 143, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 7}, { 511, 9}, \
+ { 143,10}, { 79, 9}, { 159, 8}, { 319, 9}, \
+ { 175, 8}, { 351,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207, 8}, { 415, 7}, { 831,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351,11}, { 95,10}, \
+ { 191, 9}, { 383,10}, { 207, 9}, { 415, 8}, \
+ { 831,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,12}, { 127,11}, { 255,10}, \
+ { 543,11}, { 287,10}, { 607,11}, { 319,10}, \
+ { 639,11}, { 351,10}, { 703, 9}, { 1407,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,11}, { 447,10}, { 895,11}, { 479,13}, \
+ { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \
+ { 607,12}, { 319,11}, { 639,10}, { 1279,11}, \
+ { 703,10}, { 1407,12}, { 383,11}, { 831,12}, \
+ { 447,11}, { 959,13}, { 255,12}, { 511,11}, \
+ { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \
+ { 1279,12}, { 703,11}, { 1407,13}, { 383,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
+ { 639,12}, { 1471,13}, { 767,12}, { 1599,13}, \
+ { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \
+ { 2111,13}, { 1151,12}, { 2431,13}, { 1407,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 154
+#define SQR_FFT_THRESHOLD 2688
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 45
+#define MULLO_MUL_N_THRESHOLD 6633
+
+#define DC_DIV_QR_THRESHOLD 44
+#define DC_DIVAPPR_Q_THRESHOLD 142
+#define DC_BDIV_QR_THRESHOLD 54
+#define DC_BDIV_Q_THRESHOLD 124
+
+#define INV_MULMOD_BNM1_THRESHOLD 43
+#define INV_NEWTON_THRESHOLD 179
+#define INV_APPR_THRESHOLD 157
+
+#define BINV_NEWTON_THRESHOLD 214
+#define REDC_1_TO_REDC_N_THRESHOLD 55
+
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 1078
+#define MUPI_DIV_QR_THRESHOLD 84
+#define MU_BDIV_QR_THRESHOLD 872
+#define MU_BDIV_Q_THRESHOLD 1078
+
+#define POWM_SEC_TABLE 1,19,102,428,1378
+
+#define MATRIX22_STRASSEN_THRESHOLD 12
+#define HGCD_THRESHOLD 120
+#define HGCD_APPR_THRESHOLD 166
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 339
+#define GCDEXT_DC_THRESHOLD 273
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 781
+#define SET_STR_PRECOMPUTE_THRESHOLD 1505
+
+#define FAC_DSC_THRESHOLD 141
+#define FAC_ODD_THRESHOLD 29
diff --git a/gmp/mpn/powerpc32/invert_limb.asm b/gmp/mpn/powerpc32/invert_limb.asm
new file mode 100644
index 0000000000..612bfe523c
--- /dev/null
+++ b/gmp/mpn/powerpc32/invert_limb.asm
@@ -0,0 +1,142 @@
+dnl PowerPC-32 mpn_invert_limb -- Invert a normalized limb.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: ?
+C 75x (G3): ?
+C 7400,7410 (G4): ?
+C 744x,745x (G4+): 32
+C power4/ppc970: ?
+C power5: ?
+
+EXTERN(approx_tab)
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+ rlwinm r6, r3, 11, 22, 30 C extract bits 30..22 to pos 2^1
+ srwi r10, r3, 11 C extract bits 31..11
+ LEA( r9, approx_tab) C N.B. clobbers r0 for ELF and Darwin
+ lhzx r9, r9, r6 C w2
+ addi r0, r10, 1
+ mullw r11, r9, r9
+ slwi r9, r9, 4
+ mulhwu r7, r11, r0
+ rlwinm r11, r3, 0, 31, 31 C extract bit 0
+ addi r0, r9, -1
+ srwi r9, r3, 1 C d >> 1
+ subf r0, r7, r0 C w1
+ add r9, r9, r11 C d31
+ mullw r9, r0, r9 C w1 * d31
+ srwi r10, r0, 1 C w1 >> 1
+ neg r11, r11
+ and r11, r10, r11
+ subf r11, r9, r11
+ mulhwu r9, r11, r0
+ slwi r0, r0, 15
+ srwi r9, r9, 1
+ add r0, r9, r0 C w0
+ mullw r10, r0, r3
+ mulhwu r9, r0, r3
+ addc r11, r10, r3
+ adde r3, r9, r3
+ subf r3, r3, r0
+ blr
+EPILOGUE()
+
+DEF_OBJECT(approx_tab)
+ .short 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
+ .short 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
+ .short 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
+ .short 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
+ .short 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
+ .short 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
+ .short 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
+ .short 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
+ .short 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
+ .short 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
+ .short 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
+ .short 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
+ .short 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
+ .short 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
+ .short 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
+ .short 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
+ .short 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
+ .short 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
+ .short 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
+ .short 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
+ .short 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
+ .short 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
+ .short 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
+ .short 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
+ .short 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
+ .short 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
+ .short 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
+ .short 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
+ .short 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
+ .short 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
+ .short 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
+ .short 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
+ .short 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
+ .short 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
+ .short 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
+ .short 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
+ .short 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
+ .short 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
+ .short 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
+ .short 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
+ .short 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
+ .short 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
+ .short 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
+ .short 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
+ .short 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
+ .short 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
+ .short 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
+ .short 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
+ .short 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
+ .short 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
+ .short 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
+ .short 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
+ .short 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
+ .short 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
+ .short 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
+ .short 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
+ .short 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
+ .short 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
+ .short 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
+ .short 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
+ .short 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
+ .short 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
+ .short 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
+ .short 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
+END_OBJECT(approx_tab)
+ASM_END()
diff --git a/gmp/mpn/powerpc32/lshift.asm b/gmp/mpn/powerpc32/lshift.asm
new file mode 100644
index 0000000000..948f8c6cf3
--- /dev/null
+++ b/gmp/mpn/powerpc32/lshift.asm
@@ -0,0 +1,166 @@
+dnl PowerPC-32 mpn_lshift -- Shift a number left.
+
+dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 3.0
+C 7400,7410 (G4): 3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C cnt r6
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cmpwi cr0, r5, 30 C more than 30 limbs?
+ slwi r0, r5, 2
+ add r4, r4, r0 C make r4 point at end of s1
+ add r7, r3, r0 C make r7 point at end of res
+ bgt L(BIG) C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8, r6, 32
+ lwzu r11, -4(r4) C load first s1 limb
+ srw r3, r11, r8 C compute function return value
+ bdz L(end1)
+
+L(oop): lwzu r10, -4(r4)
+ slw r9, r11, r6
+ srw r12, r10, r8
+ or r9, r9, r12
+ stwu r9, -4(r7)
+ bdz L(end2)
+ lwzu r11, -4(r4)
+ slw r9, r10, r6
+ srw r12, r11, r8
+ or r9, r9, r12
+ stwu r9, -4(r7)
+ bdnz L(oop)
+
+L(end1):
+ slw r0, r11, r6
+ stw r0, -4(r7)
+ blr
+L(end2):
+ slw r0, r10, r6
+ stw r0, -4(r7)
+ blr
+
+L(BIG):
+ stmw r24, -32(r1) C save registers we are supposed to preserve
+ lwzu r9, -4(r4)
+ subfic r8, r6, 32
+ srw r3, r9, r8 C compute function return value
+ slw r0, r9, r6
+ addi r5, r5, -1
+
+ andi. r10, r5, 3 C count for spill loop
+ beq L(e)
+ mtctr r10
+ lwzu r28, -4(r4)
+ bdz L(xe0)
+
+L(loop0):
+ slw r12, r28, r6
+ srw r24, r28, r8
+ lwzu r28, -4(r4)
+ or r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+ bdnz L(loop0) C taken at most once!
+
+L(xe0): slw r12, r28, r6
+ srw r24, r28, r8
+ or r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+
+L(e): srwi r5, r5, 2 C count for unrolled loop
+ addi r5, r5, -1
+ mtctr r5
+ lwz r28, -4(r4)
+ lwz r29, -8(r4)
+ lwz r30, -12(r4)
+ lwzu r31, -16(r4)
+
+L(loopU):
+ slw r9, r28, r6
+ srw r24, r28, r8
+ lwz r28, -4(r4)
+ slw r10, r29, r6
+ srw r25, r29, r8
+ lwz r29, -8(r4)
+ slw r11, r30, r6
+ srw r26, r30, r8
+ lwz r30, -12(r4)
+ slw r12, r31, r6
+ srw r27, r31, r8
+ lwzu r31, -16(r4)
+ or r24, r0, r24
+ stw r24, -4(r7)
+ or r25, r9, r25
+ stw r25, -8(r7)
+ or r26, r10, r26
+ stw r26, -12(r7)
+ or r27, r11, r27
+ stwu r27, -16(r7)
+ mr r0, r12
+ bdnz L(loopU)
+
+ slw r9, r28, r6
+ srw r24, r28, r8
+ slw r10, r29, r6
+ srw r25, r29, r8
+ slw r11, r30, r6
+ srw r26, r30, r8
+ slw r12, r31, r6
+ srw r27, r31, r8
+ or r24, r0, r24
+ stw r24, -4(r7)
+ or r25, r9, r25
+ stw r25, -8(r7)
+ or r26, r10, r26
+ stw r26, -12(r7)
+ or r27, r11, r27
+ stw r27, -16(r7)
+
+ stw r12, -20(r7)
+ lmw r24, -32(r1) C restore registers
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/lshiftc.asm b/gmp/mpn/powerpc32/lshiftc.asm
new file mode 100644
index 0000000000..61606d1b66
--- /dev/null
+++ b/gmp/mpn/powerpc32/lshiftc.asm
@@ -0,0 +1,168 @@
+dnl PowerPC-32 mpn_lshiftc.
+
+dnl Copyright 1995, 1998, 2000, 2002-2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 3.0
+C 7400,7410 (G4): 3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C cnt r6
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ cmpwi cr0, r5, 30 C more than 30 limbs?
+ slwi r0, r5, 2
+ add r4, r4, r0 C make r4 point at end of s1
+ add r7, r3, r0 C make r7 point at end of res
+ bgt L(BIG) C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8, r6, 32
+ lwzu r11, -4(r4) C load first s1 limb
+ srw r3, r11, r8 C compute function return value
+ bdz L(end1)
+
+L(oop): lwzu r10, -4(r4)
+ slw r9, r11, r6
+ srw r12, r10, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdz L(end2)
+ lwzu r11, -4(r4)
+ slw r9, r10, r6
+ srw r12, r11, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdnz L(oop)
+
+L(end1):
+ slw r0, r11, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+L(end2):
+ slw r0, r10, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+
+L(BIG):
+ stmw r24, -32(r1) C save registers we are supposed to preserve
+ lwzu r9, -4(r4)
+ subfic r8, r6, 32
+ srw r3, r9, r8 C compute function return value
+ slw r0, r9, r6
+ addi r5, r5, -1
+
+ andi. r10, r5, 3 C count for spill loop
+ beq L(e)
+ mtctr r10
+ lwzu r28, -4(r4)
+ bdz L(xe0)
+
+L(loop0):
+ slw r12, r28, r6
+ srw r24, r28, r8
+ lwzu r28, -4(r4)
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+ bdnz L(loop0) C taken at most once!
+
+L(xe0): slw r12, r28, r6
+ srw r24, r28, r8
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+
+L(e): srwi r5, r5, 2 C count for unrolled loop
+ addi r5, r5, -1
+ mtctr r5
+ lwz r28, -4(r4)
+ lwz r29, -8(r4)
+ lwz r30, -12(r4)
+ lwzu r31, -16(r4)
+
+L(loopU):
+ slw r9, r28, r6
+ srw r24, r28, r8
+ lwz r28, -4(r4)
+ slw r10, r29, r6
+ srw r25, r29, r8
+ lwz r29, -8(r4)
+ slw r11, r30, r6
+ srw r26, r30, r8
+ lwz r30, -12(r4)
+ slw r12, r31, r6
+ srw r27, r31, r8
+ lwzu r31, -16(r4)
+ nor r24, r0, r24
+ stw r24, -4(r7)
+ nor r25, r9, r25
+ stw r25, -8(r7)
+ nor r26, r10, r26
+ stw r26, -12(r7)
+ nor r27, r11, r27
+ stwu r27, -16(r7)
+ mr r0, r12
+ bdnz L(loopU)
+
+ slw r9, r28, r6
+ srw r24, r28, r8
+ slw r10, r29, r6
+ srw r25, r29, r8
+ slw r11, r30, r6
+ srw r26, r30, r8
+ slw r12, r31, r6
+ srw r27, r31, r8
+ nor r24, r0, r24
+ stw r24, -4(r7)
+ nor r25, r9, r25
+ stw r25, -8(r7)
+ nor r26, r10, r26
+ stw r26, -12(r7)
+ nor r27, r11, r27
+ stw r27, -16(r7)
+ nor r12, r12, r12
+ stw r12, -20(r7)
+ lmw r24, -32(r1) C restore registers
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/mod_34lsub1.asm b/gmp/mpn/powerpc32/mod_34lsub1.asm
new file mode 100644
index 0000000000..6d7fe4d089
--- /dev/null
+++ b/gmp/mpn/powerpc32/mod_34lsub1.asm
@@ -0,0 +1,145 @@
+dnl PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
+
+dnl Copyright 2002, 2003, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 603e: ?
+C 604e: 3
+C 75x (G3): 3
+C 7400,7410 (G4): 3
+C 744x,745x (G4+): 3
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
+C
+C There seems no need to schedule the loads back, the code is still 3.0 c/l
+C on 750/7400 no matter where they're placed.
+C
+C Alternatives:
+C
+C Fetching half words would allow add instead for accumulating, instead of
+C adde and its serialization. An outer loop would be required though, since
+C 2^16 halfwords can overflow. lhz+add would be 2.0 c/l, but if there's
+C also a bdz or bdnz for each and a pointer update say every three limbs
+C then the total would be 2.67 c/l which isn't much faster than the current
+C simpler code.
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+
+ C r3 src
+ C r4 size
+
+ mtctr r4
+ addic r6, r3, 8 C &src[2], and clear CA
+
+ lwz r3, 0(r3) C acc0 = src[0]
+ bdz L(done)
+
+ lwz r4, -4(r6) C acc1 = src[1]
+ bdz L(two)
+
+ lwz r5, 0(r6) C acc2 = src[2]
+ lis r7, 0 C no carry if just three limbs
+
+ bdz L(three)
+ lis r7, 1 C 0x10000 carry pos
+
+L(top):
+ C r3 acc0
+ C r4 acc1
+ C r5 acc2
+ C r6 src, incrementing
+ C r7 carry pos
+
+ lwz r0, 4(r6)
+ adde r3, r3, r0
+ bdz L(end0)
+
+ lwz r0, 8(r6)
+ adde r4, r4, r0
+ bdz L(end1)
+
+ lwzu r0, 12(r6)
+ adde r5, r5, r0
+ bdnz L(top)
+
+
+ srwi r7, r7, 8
+L(end0):
+ srwi r7, r7, 8
+L(end1):
+ subfe r0, r0, r0 C -1 if not CA
+
+ andc r7, r7, r0 C final carry, 0x10000, 0x100, 1 or 0
+L(three):
+ rlwinm r6, r3, 0,8,31 C acc0 low
+
+ add r7, r7, r6
+ rlwinm r6, r3, 8,24,31 C acc0 high
+
+ add r7, r7, r6
+ rlwinm r6, r4, 8,8,23 C acc1 low
+
+ add r7, r7, r6
+ rlwinm r6, r4, 16,16,31 C acc1 high
+
+ add r7, r7, r6
+ rlwinm r6, r5, 16,8,15 C acc2 low
+
+ add r7, r7, r6
+ rlwinm r6, r5, 24,8,31 C acc2 high
+
+ add r3, r7, r6
+
+L(done):
+ blr
+
+L(two):
+ C r3 acc0
+ C r4 acc1
+
+ rlwinm r5, r3, 8,24,31 C acc0 high
+ rlwinm r3, r3, 0,8,31 C acc0 low
+
+ add r3, r3, r5 C acc0 high + low
+ rlwinm r5, r4, 16,16,31 C acc1 high
+
+ add r3, r3, r5 C add acc1 high
+ rlwinm r5, r4, 8,8,23 C acc1 low
+
+ add r3, r3, r5 C add acc1 low
+
+ blr
+
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/mode1o.asm b/gmp/mpn/powerpc32/mode1o.asm
new file mode 100644
index 0000000000..e8a6b5e28a
--- /dev/null
+++ b/gmp/mpn/powerpc32/mode1o.asm
@@ -0,0 +1,127 @@
+dnl PowerPC-32 mpn_modexact_1_odd -- mpn by limb exact remainder.
+
+dnl Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 603e: ?
+C 604e: 6.0
+C 75x (G3): 6.0-13.0, depending on divisor
+C 7400,7410 (G4): 6.0-13.0, depending on divisor
+C 744x,745x (G4+): 8.0-10.0, depending on divisor
+C power4/ppc970: 12.0
+C power5: 12.0
+
+
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor, mp_limb_t carry);
+C
+C For PIC, the inverse is established arithmetically since it measures about
+C 5 cycles faster than the nonsense needed to access binvert_limb_table in
+C SVR4 or Darwin style PIC. AIX might be better, since it avoids bl/mflr to
+C get at the GOT/TOC/whatever.
+C
+C Using divwu for size==1 measured about 10 cycles slower on 604e, or about
+C 3-5 cycles faster on 750. For now it doesn't seem worth bothering with.
+C
+C The loop allows an early-out on mullw for the inverse, and on mulhwu for
+C the divisor. So the fastest is for instance divisor==1 (inverse==-1), and
+C the slowest is anything giving a full 32-bits in both, such as
+C divisor==0xDEADBEEF (inverse==0x904B300F). These establish the stated
+C range above for 750 and 7400.
+
+
+ASM_START()
+
+EXTERN(binvert_limb_table)
+
+PROLOGUE(mpn_modexact_1_odd)
+ li r6, 0
+
+PROLOGUE(mpn_modexact_1c_odd)
+
+ mtctr r4 C size
+
+ifdef(`PIC_SLOW',`
+C Load from our table with PIC is so slow on Linux and Darwin that we avoid it
+ rlwinm r7, r5, 1,28,28 C (divisor << 1) & 8
+ rlwinm r8, r5, 2,28,28 C (divisor << 2) & 8
+ xor r7, r7, r8 C ((divisor << 1) ^ (divisor << 2)) & 8
+ rlwinm r4, r5, 0,28,31 C divisor low 4 bits, speedup mullw
+ xor r4, r4, r7 C inverse, 4 bits
+ mullw r7, r4, r4 C i*i
+ slwi r4, r4, 1 C 2*i
+ rlwinm r8, r5, 0,24,31 C divisor low 8 bits, speedup mullw
+ mullw r7, r7, r8 C i*i*d
+ sub r4, r4, r7 C inverse, 8 bits
+',`
+ LEA( r7, binvert_limb_table)
+ rlwinm r4, r5, 31,25,31 C (divisor/2) & 0x7F
+ lbzx r4, r4,r7 C inverse, 8 bits
+')
+
+ mullw r7, r4, r4 C i*i
+ slwi r4, r4, 1 C 2*i
+ mullw r7, r5, r7 C i*i*d [i*i is 16 bits, so second operand]
+ sub r4, r4, r7 C inverse, 16 bits
+ mullw r7, r4, r4 C i*i
+ slwi r4, r4, 1 C 2*i
+ mullw r7, r7, r5 C i*i*d
+ lwz r0, 0(r3) C src[0]
+ sub r4, r4, r7 C inverse, 32 bits
+ subfc r7, r6, r0 C l = src[0] - carry
+
+ mullw r7, r7, r4 C q = l * inverse
+ bdz L(one)
+
+ lwzu r0, 4(r3) C src[1]
+ mulhwu r6, r7, r5 C carry = high(q*divisor)
+ subfe r7, r6, r0 C l = src[1] - carry
+ bdz L(two)
+
+L(top):
+ mullw r7, r7, r4 C q = l * inverse
+ lwzu r0, 4(r3) C src[i]
+ mulhwu r6, r7, r5 C carry = high(q*divisor)
+ subfe r7, r6, r0 C l = src[i] - carry
+ bdnz L(top)
+
+L(two): mullw r7, r7, r4 C q = l * inverse
+L(one): subfe r3, r3, r3 C ca 0 or -1
+ mulhwu r6, r7, r5 C carry = high(q*divisor)
+ subf r3, r3, r6 C carry + ca
+ blr
+
+EPILOGUE(mpn_modexact_1c_odd)
+EPILOGUE(mpn_modexact_1_odd)
+ASM_END()
diff --git a/gmp/mpn/powerpc32/mul_1.asm b/gmp/mpn/powerpc32/mul_1.asm
new file mode 100644
index 0000000000..e42087cfa8
--- /dev/null
+++ b/gmp/mpn/powerpc32/mul_1.asm
@@ -0,0 +1,101 @@
+dnl PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl result in a second limb vector.
+
+dnl Copyright 1995, 1997, 2000, 2002, 2003, 2005 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 4.0
+C 75x (G3): 4.5-11
+C 7400,7410 (G4): 4.5-11
+C 744x,745x (G4+): 6.0
+C power4/ppc970: 6.0
+C power5: 5.63
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C vl r6
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mtctr r5
+ addi r3,r3,-4 C adjust res_ptr, it's offset before it's used
+ li r12,0 C clear upper product reg
+ addic r0,r0,0 C clear cy
+C Start software pipeline
+ lwz r8,0(r4)
+ bdz L(end3)
+ lwzu r9,4(r4)
+ mullw r11,r8,r6
+ mulhwu r0,r8,r6
+ bdz L(end1)
+C Software pipelined main loop
+L(loop):
+ lwz r8,4(r4)
+ mullw r10,r9,r6
+ adde r5,r11,r12
+ mulhwu r12,r9,r6
+ stw r5,4(r3)
+ bdz L(end2)
+ lwzu r9,8(r4)
+ mullw r11,r8,r6
+ adde r7,r10,r0
+ mulhwu r0,r8,r6
+ stwu r7,8(r3)
+ bdnz L(loop)
+C Finish software pipeline
+L(end1):
+ mullw r10,r9,r6
+ adde r5,r11,r12
+ mulhwu r12,r9,r6
+ stw r5,4(r3)
+ adde r7,r10,r0
+ stwu r7,8(r3)
+ addze r3,r12
+ blr
+L(end2):
+ mullw r11,r8,r6
+ adde r7,r10,r0
+ mulhwu r0,r8,r6
+ stwu r7,8(r3)
+ adde r5,r11,r12
+ stw r5,4(r3)
+ addze r3,r0
+ blr
+L(end3):
+ mullw r11,r8,r6
+ stw r11,4(r3)
+ mulhwu r3,r8,r6
+ blr
+EPILOGUE(mpn_mul_1)
diff --git a/gmp/mpn/powerpc32/p3-p7/aors_n.asm b/gmp/mpn/powerpc32/p3-p7/aors_n.asm
new file mode 100644
index 0000000000..c44df8fa50
--- /dev/null
+++ b/gmp/mpn/powerpc32/p3-p7/aors_n.asm
@@ -0,0 +1,186 @@
+dnl PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 1.5
+C POWER4/PPC970 2
+C POWER5 2
+C POWER6 2.78
+C POWER7 2.15-2.87
+
+C This code is based on powerpc64/aors_n.asm.
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C vp r5
+C n r6
+
+ifdef(`OPERATION_add_n',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)
+ define(GENRVAL, `addi r3, r3, 1')
+ define(SETCBR, `addic r0, $1, -1')
+ define(CLRCB, `addic r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)
+ define(GENRVAL, `neg r3, r3')
+ define(SETCBR, `subfic r0, $1, 0')
+ define(CLRCB, `addic r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ SETCBR(r7)
+ b L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+ CLRCB
+L(ent): stw r31, -4(r1)
+ stw r30, -8(r1)
+ stw r29, -12(r1)
+ stw r28, -16(r1)
+
+ rlwinm. r0, r6, 0,30,31 C r0 = n & 3, set cr0
+ cmpwi cr6, r0, 2
+ addi r6, r6, 3 C compute count...
+ srwi r6, r6, 2 C ...for ctr
+ mtctr r6 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): lwz r8, 0(r4) C load s1 limb
+ lwz r9, 0(r5) C load s2 limb
+ lwz r10, 4(r4) C load s1 limb
+ lwz r11, 4(r5) C load s2 limb
+ lwz r12, 8(r4) C load s1 limb
+ addi r4, r4, 12
+ lwz r0, 8(r5) C load s2 limb
+ addi r5, r5, 12
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ stw r29, 0(r3)
+ stw r30, 4(r3)
+ stw r31, 8(r3)
+ addi r3, r3, 12
+ bdnz L(go)
+ b L(ret)
+
+L(b01): lwz r12, 0(r4) C load s1 limb
+ addi r4, r4, 4
+ lwz r0, 0(r5) C load s2 limb
+ addi r5, r5, 4
+ ADDSUBC r31, r0, r12 C add
+ stw r31, 0(r3)
+ addi r3, r3, 4
+ bdnz L(go)
+ b L(ret)
+
+L(b10): lwz r10, 0(r4) C load s1 limb
+ lwz r11, 0(r5) C load s2 limb
+ lwz r12, 4(r4) C load s1 limb
+ addi r4, r4, 8
+ lwz r0, 4(r5) C load s2 limb
+ addi r5, r5, 8
+ ADDSUBC r30, r11, r10 C add
+ ADDSUBC r31, r0, r12 C add
+ stw r30, 0(r3)
+ stw r31, 4(r3)
+ addi r3, r3, 8
+ bdnz L(go)
+ b L(ret)
+
+L(b00): C INITCY C clear/set cy
+L(go): lwz r6, 0(r4) C load s1 limb
+ lwz r7, 0(r5) C load s2 limb
+ lwz r8, 4(r4) C load s1 limb
+ lwz r9, 4(r5) C load s2 limb
+ lwz r10, 8(r4) C load s1 limb
+ lwz r11, 8(r5) C load s2 limb
+ lwz r12, 12(r4) C load s1 limb
+ lwz r0, 12(r5) C load s2 limb
+ bdz L(end)
+
+ addi r4, r4, 16
+ addi r5, r5, 16
+
+ ALIGN(16)
+L(top): ADDSUBC r28, r7, r6
+ lwz r6, 0(r4) C load s1 limb
+ lwz r7, 0(r5) C load s2 limb
+ ADDSUBC r29, r9, r8
+ lwz r8, 4(r4) C load s1 limb
+ lwz r9, 4(r5) C load s2 limb
+ ADDSUBC r30, r11, r10
+ lwz r10, 8(r4) C load s1 limb
+ lwz r11, 8(r5) C load s2 limb
+ ADDSUBC r31, r0, r12
+ lwz r12, 12(r4) C load s1 limb
+ lwz r0, 12(r5) C load s2 limb
+ stw r28, 0(r3)
+ addi r4, r4, 16
+ stw r29, 4(r3)
+ addi r5, r5, 16
+ stw r30, 8(r3)
+ stw r31, 12(r3)
+ addi r3, r3, 16
+ bdnz L(top) C decrement ctr and loop back
+
+L(end): ADDSUBC r28, r7, r6
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ stw r28, 0(r3)
+ stw r29, 4(r3)
+ stw r30, 8(r3)
+ stw r31, 12(r3)
+
+L(ret): lwz r31, -4(r1)
+ lwz r30, -8(r1)
+ lwz r29, -12(r1)
+ lwz r28, -16(r1)
+
+ subfe r3, r0, r0 C -cy
+ GENRVAL
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/p3/gmp-mparam.h b/gmp/mpn/powerpc32/p3/gmp-mparam.h
new file mode 100644
index 0000000000..33826956a2
--- /dev/null
+++ b/gmp/mpn/powerpc32/p3/gmp-mparam.h
@@ -0,0 +1,155 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 450 MHz POWER3 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 38
+#define MUL_TOOM44_THRESHOLD 58
+#define MUL_TOOM6H_THRESHOLD 129
+#define MUL_TOOM8H_THRESHOLD 212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 63
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 59
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 64
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 14
+#define SQR_TOOM3_THRESHOLD 53
+#define SQR_TOOM4_THRESHOLD 76
+#define SQR_TOOM6_THRESHOLD 106
+#define SQR_TOOM8_THRESHOLD 284
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 9
+
+#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 220, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 23, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 23, 9}, { 15, 8}, { 35, 9}, \
+ { 23,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 71, 8}, { 143, 9}, \
+ { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 9}, { 143,10}, { 79, 9}, \
+ { 159, 8}, { 319, 9}, { 175, 8}, { 351,10}, \
+ { 95, 9}, { 191, 8}, { 383,10}, { 111,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \
+ { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 351, 9}, { 703, 8}, \
+ { 1407,11}, { 191,10}, { 415,11}, { 223,10}, \
+ { 447, 9}, { 895,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 82
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 176 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 176, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 16, 7}, { 9, 6}, \
+ { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 23, 9}, \
+ { 7, 8}, { 15, 7}, { 31, 8}, { 23, 9}, \
+ { 15, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 7}, { 287, 6}, \
+ { 575, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 143, 8}, { 287, 7}, { 575,10}, \
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175,10}, \
+ { 95, 9}, { 191, 8}, { 383,10}, { 111, 9}, \
+ { 223,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 351, 9}, \
+ { 703, 8}, { 1407,11}, { 191,10}, { 383,11}, \
+ { 223,10}, { 447, 9}, { 895,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 87
+#define SQR_FFT_THRESHOLD 1728
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 32
+#define DC_DIVAPPR_Q_THRESHOLD 123
+#define DC_BDIV_QR_THRESHOLD 34
+#define DC_BDIV_Q_THRESHOLD 84
+
+#define INV_MULMOD_BNM1_THRESHOLD 42
+#define INV_NEWTON_THRESHOLD 129
+#define INV_APPR_THRESHOLD 124
+
+#define BINV_NEWTON_THRESHOLD 148
+#define REDC_1_TO_REDC_N_THRESHOLD 38
+
+#define MU_DIV_QR_THRESHOLD 748
+#define MU_DIVAPPR_Q_THRESHOLD 748
+#define MUPI_DIV_QR_THRESHOLD 59
+#define MU_BDIV_QR_THRESHOLD 562
+#define MU_BDIV_Q_THRESHOLD 654
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 76
+#define GCD_DC_THRESHOLD 205
+#define GCDEXT_DC_THRESHOLD 174
+#define JACOBI_BASE_METHOD 1
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 181
+#define SET_STR_PRECOMPUTE_THRESHOLD 525
diff --git a/gmp/mpn/powerpc32/p4/gmp-mparam.h b/gmp/mpn/powerpc32/p4/gmp-mparam.h
new file mode 100644
index 0000000000..20830a0bd7
--- /dev/null
+++ b/gmp/mpn/powerpc32/p4/gmp-mparam.h
@@ -0,0 +1,204 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2011, 2014 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* 1800 MHz PowerPC-970 */
+/* FFT tuning limit = 10000000 */
+/* Generated by tuneup.c, 2014-03-12, gcc 4.0 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 42
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 14
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 45
+
+#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM33_THRESHOLD 73
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 222
+#define MUL_TOOM8H_THRESHOLD 333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 107
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 92
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 100
+
+#define SQR_BASECASE_THRESHOLD 5
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 85
+#define SQR_TOOM4_THRESHOLD 160
+#define SQR_TOOM6_THRESHOLD 197
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMID_TOOM42_THRESHOLD 32
+
+#define MULMOD_BNM1_THRESHOLD 15
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define MUL_FFT_MODF_THRESHOLD 444 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 444, 5}, { 17, 6}, { 9, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
+ { 24, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
+ { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 167,10}, { 95, 9}, { 191, 8}, { 383,10}, \
+ { 111,11}, { 63,10}, { 127, 9}, { 255, 8}, \
+ { 511,10}, { 143, 9}, { 287, 8}, { 575, 9}, \
+ { 303,10}, { 159, 9}, { 319,11}, { 95,10}, \
+ { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
+ { 1087,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,10}, { 335, 9}, { 671, 8}, { 1343,10}, \
+ { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447,12}, { 127,11}, { 255,10}, { 543, 9}, \
+ { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \
+ { 319,10}, { 671, 9}, { 1343,11}, { 351,10}, \
+ { 703, 9}, { 1407,12}, { 191,11}, { 383,10}, \
+ { 767,11}, { 415,10}, { 831,11}, { 447,13}, \
+ { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \
+ { 607,10}, { 1215,12}, { 319,11}, { 671,10}, \
+ { 1343,11}, { 703,10}, { 1407,11}, { 735,12}, \
+ { 383,11}, { 767,10}, { 1535,11}, { 831,12}, \
+ { 447,10}, { 1791,11}, { 959,13}, { 255,12}, \
+ { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \
+ { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \
+ { 1407,13}, { 383,12}, { 767,11}, { 1535,12}, \
+ { 831,11}, { 1727,10}, { 3455,11}, { 1791,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1215,11}, \
+ { 2431,13}, { 639,12}, { 1471,13}, { 767,12}, \
+ { 1727,11}, { 3455,12}, { 1791,14}, { 511,13}, \
+ { 1151,12}, { 2431,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 157
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 28, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 135,10}, { 79, 9}, { 159, 8}, \
+ { 319,10}, { 95, 9}, { 191, 8}, { 383, 9}, \
+ { 207,11}, { 63,10}, { 127, 9}, { 255, 8}, \
+ { 511, 9}, { 271,10}, { 143, 9}, { 287, 8}, \
+ { 575, 9}, { 303, 8}, { 607,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
+ { 1087,10}, { 287, 9}, { 575,10}, { 303, 9}, \
+ { 607,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,12}, { 127,11}, \
+ { 255,10}, { 543, 9}, { 1087,11}, { 287,10}, \
+ { 607, 9}, { 1215,11}, { 319,10}, { 671,11}, \
+ { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
+ { 767,11}, { 415,10}, { 831,11}, { 479,13}, \
+ { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \
+ { 607,10}, { 1215,12}, { 319,11}, { 671,10}, \
+ { 1343,11}, { 703,10}, { 1407,11}, { 735,12}, \
+ { 383,11}, { 831,12}, { 447,11}, { 959,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \
+ { 1407,13}, { 383,12}, { 831,11}, { 1727,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
+ { 639,12}, { 1471,13}, { 767,12}, { 1727,13}, \
+ { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \
+ { 2111,13}, { 1151,12}, { 2431,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 150
+#define SQR_FFT_THRESHOLD 4736
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 55
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 50
+#define DC_DIVAPPR_Q_THRESHOLD 196
+#define DC_BDIV_QR_THRESHOLD 51
+#define DC_BDIV_Q_THRESHOLD 166
+
+#define INV_MULMOD_BNM1_THRESHOLD 50
+#define INV_NEWTON_THRESHOLD 226
+#define INV_APPR_THRESHOLD 202
+
+#define BINV_NEWTON_THRESHOLD 228
+#define REDC_1_TO_REDC_N_THRESHOLD 67
+
+#define MU_DIV_QR_THRESHOLD 1187
+#define MU_DIVAPPR_Q_THRESHOLD 1308
+#define MUPI_DIV_QR_THRESHOLD 114
+#define MU_BDIV_QR_THRESHOLD 998
+#define MU_BDIV_Q_THRESHOLD 1142
+
+#define POWM_SEC_TABLE 3,28,78,480,1099
+
+#define MATRIX22_STRASSEN_THRESHOLD 9
+#define HGCD_THRESHOLD 93
+#define HGCD_APPR_THRESHOLD 109
+#define HGCD_REDUCE_THRESHOLD 2479
+#define GCD_DC_THRESHOLD 379
+#define GCDEXT_DC_THRESHOLD 273
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 381
+#define SET_STR_PRECOMPUTE_THRESHOLD 1002
+
+#define FAC_DSC_THRESHOLD 179
+#define FAC_ODD_THRESHOLD 28
diff --git a/gmp/mpn/powerpc32/p5/gmp-mparam.h b/gmp/mpn/powerpc32/p5/gmp-mparam.h
new file mode 100644
index 0000000000..faa1e81da4
--- /dev/null
+++ b/gmp/mpn/powerpc32/p5/gmp-mparam.h
@@ -0,0 +1,156 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1650 MHz POWER5 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 50
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 61
+
+#define MUL_TOOM22_THRESHOLD 22
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 189
+#define MUL_TOOM8H_THRESHOLD 309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 83
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
+
+#define SQR_BASECASE_THRESHOLD 6
+#define SQR_TOOM2_THRESHOLD 40
+#define SQR_TOOM3_THRESHOLD 77
+#define SQR_TOOM4_THRESHOLD 124
+#define SQR_TOOM6_THRESHOLD 140
+#define SQR_TOOM8_THRESHOLD 238
+
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 15
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define POWM_SEC_TABLE 4,29,252,840,2080
+
+#define MUL_FFT_MODF_THRESHOLD 412 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 412, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 55,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \
+ { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
+ { 159,10}, { 335, 9}, { 671,10}, { 351, 9}, \
+ { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 415, 9}, { 831,11}, { 223,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 71
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 27, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 67, 9}, { 47,10}, { 31, 9}, \
+ { 71,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \
+ { 143, 9}, { 287, 8}, { 575, 9}, { 303,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,10}, { 351,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415,11}, { 223,10}, { 447,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 76
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 68
+#define MULLO_MUL_N_THRESHOLD 9236
+
+#define DC_DIV_QR_THRESHOLD 69
+#define DC_DIVAPPR_Q_THRESHOLD 220
+#define DC_BDIV_QR_THRESHOLD 75
+#define DC_BDIV_Q_THRESHOLD 188
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 230
+#define INV_APPR_THRESHOLD 230
+
+#define BINV_NEWTON_THRESHOLD 278
+#define REDC_1_TO_REDC_N_THRESHOLD 87
+
+#define MU_DIV_QR_THRESHOLD 1210
+#define MU_DIVAPPR_Q_THRESHOLD 1308
+#define MUPI_DIV_QR_THRESHOLD 106
+#define MU_BDIV_QR_THRESHOLD 1017
+#define MU_BDIV_Q_THRESHOLD 1210
+
+#define MATRIX22_STRASSEN_THRESHOLD 14
+#define HGCD_THRESHOLD 110
+#define HGCD_APPR_THRESHOLD 138
+#define HGCD_REDUCE_THRESHOLD 2578
+#define GCD_DC_THRESHOLD 408
+#define GCDEXT_DC_THRESHOLD 298
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 527
+#define SET_STR_PRECOMPUTE_THRESHOLD 1090
diff --git a/gmp/mpn/powerpc32/p6/gmp-mparam.h b/gmp/mpn/powerpc32/p6/gmp-mparam.h
new file mode 100644
index 0000000000..c9504b63b3
--- /dev/null
+++ b/gmp/mpn/powerpc32/p6/gmp-mparam.h
@@ -0,0 +1,165 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 3500 MHz POWER6 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 19
+#define MUL_TOOM33_THRESHOLD 55
+#define MUL_TOOM44_THRESHOLD 88
+#define MUL_TOOM6H_THRESHOLD 137
+#define MUL_TOOM8H_THRESHOLD 181
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 57
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 56
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 57
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 56
+#define SQR_TOOM4_THRESHOLD 130
+#define SQR_TOOM6_THRESHOLD 189
+#define SQR_TOOM8_THRESHOLD 296
+
+#define MULMID_TOOM42_THRESHOLD 26
+
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 12
+
+#define POWM_SEC_TABLE 2,26,127,453,1068
+
+#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 212, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 16, 7}, { 9, 6}, \
+ { 19, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 31, 8}, { 19, 7}, { 39, 8}, { 23, 9}, \
+ { 15, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 7}, { 287, 9}, \
+ { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 7}, { 511, 9}, { 143, 8}, \
+ { 287,10}, { 79, 9}, { 159, 8}, { 319, 9}, \
+ { 175, 8}, { 351,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207,10}, { 111,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \
+ { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 89
+#define MUL_FFT_THRESHOLD 1728
+
+#define SQR_FFT_MODF_THRESHOLD 184 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 184, 5}, { 6, 4}, { 13, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 23, 9}, { 7, 8}, { 23, 9}, \
+ { 15, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 63, 8}, { 127, 7}, \
+ { 255, 9}, { 71, 8}, { 143, 7}, { 287, 6}, \
+ { 575, 9}, { 79,10}, { 47,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 9}, { 143, 8}, \
+ { 287, 7}, { 575,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 175, 8}, { 351,10}, { 95, 9}, \
+ { 191, 8}, { 383, 9}, { 207,10}, { 111, 9}, \
+ { 223,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351,11}, { 95,10}, \
+ { 191, 9}, { 383,10}, { 207, 9}, { 415,10}, \
+ { 223,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 351, 9}, { 703, 8}, { 1407,11}, { 191,10}, \
+ { 415,11}, { 223,10}, { 447, 9}, { 895,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 92
+#define SQR_FFT_THRESHOLD 1600
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 57
+#define MULLO_MUL_N_THRESHOLD 3176
+
+#define DC_DIV_QR_THRESHOLD 52
+#define DC_DIVAPPR_Q_THRESHOLD 187
+#define DC_BDIV_QR_THRESHOLD 64
+#define DC_BDIV_Q_THRESHOLD 146
+
+#define INV_MULMOD_BNM1_THRESHOLD 68
+#define INV_NEWTON_THRESHOLD 182
+#define INV_APPR_THRESHOLD 182
+
+#define BINV_NEWTON_THRESHOLD 186
+#define REDC_1_TO_REDC_N_THRESHOLD 60
+
+#define MU_DIV_QR_THRESHOLD 924
+#define MU_DIVAPPR_Q_THRESHOLD 807
+#define MUPI_DIV_QR_THRESHOLD 73
+#define MU_BDIV_QR_THRESHOLD 667
+#define MU_BDIV_Q_THRESHOLD 823
+
+#define MATRIX22_STRASSEN_THRESHOLD 8
+#define HGCD_THRESHOLD 61
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 974
+#define GCD_DC_THRESHOLD 195
+#define GCDEXT_DC_THRESHOLD 134
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 9
+#define GET_STR_PRECOMPUTE_THRESHOLD 21
+#define SET_STR_DC_THRESHOLD 190
+#define SET_STR_PRECOMPUTE_THRESHOLD 411
diff --git a/gmp/mpn/powerpc32/p7/gmp-mparam.h b/gmp/mpn/powerpc32/p7/gmp-mparam.h
new file mode 100644
index 0000000000..35bb61dca2
--- /dev/null
+++ b/gmp/mpn/powerpc32/p7/gmp-mparam.h
@@ -0,0 +1,159 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 3550 MHz POWER7/T4 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 34
+
+#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM33_THRESHOLD 89
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 286
+#define MUL_TOOM8H_THRESHOLD 363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 121
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 50
+#define SQR_TOOM3_THRESHOLD 89
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 381
+
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 18
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define POWM_SEC_TABLE 4,35,225,780,2212
+
+#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 476, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 14, 5}, { 29, 6}, { 21, 7}, { 11, 6}, \
+ { 25, 7}, { 13, 6}, { 29, 7}, { 15, 6}, \
+ { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
+ { 159,11}, { 95,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
+ { 1087,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671, 8}, { 1343,10}, { 351,11}, \
+ { 191,10}, { 415, 9}, { 831,10}, { 431,11}, \
+ { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 77
+#define MUL_FFT_THRESHOLD 5312
+
+#define SQR_FFT_MODF_THRESHOLD 344 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 344, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 79,10}, { 47,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \
+ { 143, 9}, { 287, 8}, { 575, 9}, { 303,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543, 8}, { 1087,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 79
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 34
+#define MULLO_MUL_N_THRESHOLD 10323
+
+#define DC_DIV_QR_THRESHOLD 52
+#define DC_DIVAPPR_Q_THRESHOLD 202
+#define DC_BDIV_QR_THRESHOLD 68
+#define DC_BDIV_Q_THRESHOLD 152
+
+#define INV_MULMOD_BNM1_THRESHOLD 66
+#define INV_NEWTON_THRESHOLD 226
+#define INV_APPR_THRESHOLD 189
+
+#define BINV_NEWTON_THRESHOLD 292
+#define REDC_1_TO_REDC_N_THRESHOLD 79
+
+#define MU_DIV_QR_THRESHOLD 1442
+#define MU_DIVAPPR_Q_THRESHOLD 1442
+#define MUPI_DIV_QR_THRESHOLD 91
+#define MU_BDIV_QR_THRESHOLD 1308
+#define MU_BDIV_Q_THRESHOLD 1442
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 126
+#define HGCD_APPR_THRESHOLD 139
+#define HGCD_REDUCE_THRESHOLD 2681
+#define GCD_DC_THRESHOLD 573
+#define GCDEXT_DC_THRESHOLD 448
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 9
+#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define SET_STR_DC_THRESHOLD 834
+#define SET_STR_PRECOMPUTE_THRESHOLD 1888
diff --git a/gmp/mpn/powerpc32/powerpc-defs.m4 b/gmp/mpn/powerpc32/powerpc-defs.m4
new file mode 100644
index 0000000000..0c142a2e0c
--- /dev/null
+++ b/gmp/mpn/powerpc32/powerpc-defs.m4
@@ -0,0 +1,104 @@
+divert(-1)
+
+dnl m4 macros for PowerPC assembler (32 and 64 bit).
+
+dnl Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl This is the same as the default in mpn/asm-defs.m4, but with ALIGN(4)
+dnl not 8.
+dnl
+dnl 4-byte alignment is normally enough, certainly it's what gcc gives. We
+dnl don't want bigger alignment within PROLOGUE since it can introduce
+dnl padding into multiple-entrypoint routines, and with gas such padding is
+dnl zero words, which are not valid instructions.
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+` TEXT
+ ALIGN(4)
+ GLOBL `$1' GLOBL_ATTR
+ TYPE(`$1',`function')
+`$1'LABEL_SUFFIX')
+
+
+dnl Usage: r0 ... r31, cr0 ... cr7
+dnl
+dnl Registers names, either left as "r0" etc or mapped to plain 0 etc,
+dnl according to the result of the GMP_ASM_POWERPC_REGISTERS configure
+dnl test.
+
+ifelse(WANT_R_REGISTERS,no,`
+forloop(i,0,31,`deflit(`r'i,i)')
+forloop(i,0,31,`deflit(`v'i,i)')
+forloop(i,0,31,`deflit(`f'i,i)')
+forloop(i,0,7, `deflit(`cr'i,i)')
+')
+
+
+dnl Usage: ASSERT(cond,instructions)
+dnl
+dnl If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl flags condition to then be satisfied. For example,
+dnl
+dnl ASSERT(eq, `cmpwi r6, 123')
+dnl
+dnl The instructions can be omitted to just assert a flags condition with
+dnl no extra calculation. For example,
+dnl
+dnl ASSERT(ne)
+dnl
+dnl The condition can be omitted to just output the given instructions when
+dnl assertion checking is wanted. For example,
+dnl
+dnl ASSERT(, `mr r11, r0')
+dnl
+dnl Using a zero word for an illegal instruction is probably not ideal,
+dnl since it marks the beginning of a traceback table in the 64-bit ABI.
+dnl But assertions are only for development, so it doesn't matter too much.
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+m4_assert_defined(`WANT_ASSERT')
+`ifelse(WANT_ASSERT,1,
+ `C ASSERT
+ $2
+ifelse(`$1',,,
+` b$1 L(ASSERT_ok`'ASSERT_counter)
+ W32 0 C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+define(`ASSERT_counter',incr(ASSERT_counter))
+')')')
+
+define(ASSERT_counter,1)
+
+
+divert
diff --git a/gmp/mpn/powerpc32/rshift.asm b/gmp/mpn/powerpc32/rshift.asm
new file mode 100644
index 0000000000..cb0046d5ee
--- /dev/null
+++ b/gmp/mpn/powerpc32/rshift.asm
@@ -0,0 +1,164 @@
+dnl PowerPC-32 mpn_rshift -- Shift a number right.
+
+dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 3.0
+C 7400,7410 (G4): 3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C cnt r6
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ cmpwi cr0, r5, 30 C more than 30 limbs?
+ addi r7, r3, -4 C dst-4
+ bgt L(BIG) C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8, r6, 32
+ lwz r11, 0(r4) C load first s1 limb
+ slw r3, r11, r8 C compute function return value
+ bdz L(end1)
+
+L(oop): lwzu r10, 4(r4)
+ srw r9, r11, r6
+ slw r12, r10, r8
+ or r9, r9, r12
+ stwu r9, 4(r7)
+ bdz L(end2)
+ lwzu r11, 4(r4)
+ srw r9, r10, r6
+ slw r12, r11, r8
+ or r9, r9, r12
+ stwu r9, 4(r7)
+ bdnz L(oop)
+
+L(end1):
+ srw r0, r11, r6
+ stw r0, 4(r7)
+ blr
+L(end2):
+ srw r0, r10, r6
+ stw r0, 4(r7)
+ blr
+
+L(BIG):
+ stmw r24, -32(r1) C save registers we are supposed to preserve
+ lwz r9, 0(r4)
+ subfic r8, r6, 32
+ slw r3, r9, r8 C compute function return value
+ srw r0, r9, r6
+ addi r5, r5, -1
+
+ andi. r10, r5, 3 C count for spill loop
+ beq L(e)
+ mtctr r10
+ lwzu r28, 4(r4)
+ bdz L(xe0)
+
+L(loop0):
+ srw r12, r28, r6
+ slw r24, r28, r8
+ lwzu r28, 4(r4)
+ or r24, r0, r24
+ stwu r24, 4(r7)
+ mr r0, r12
+ bdnz L(loop0) C taken at most once!
+
+L(xe0): srw r12, r28, r6
+ slw r24, r28, r8
+ or r24, r0, r24
+ stwu r24, 4(r7)
+ mr r0, r12
+
+L(e): srwi r5, r5, 2 C count for unrolled loop
+ addi r5, r5, -1
+ mtctr r5
+ lwz r28, 4(r4)
+ lwz r29, 8(r4)
+ lwz r30, 12(r4)
+ lwzu r31, 16(r4)
+
+L(loopU):
+ srw r9, r28, r6
+ slw r24, r28, r8
+ lwz r28, 4(r4)
+ srw r10, r29, r6
+ slw r25, r29, r8
+ lwz r29, 8(r4)
+ srw r11, r30, r6
+ slw r26, r30, r8
+ lwz r30, 12(r4)
+ srw r12, r31, r6
+ slw r27, r31, r8
+ lwzu r31, 16(r4)
+ or r24, r0, r24
+ stw r24, 4(r7)
+ or r25, r9, r25
+ stw r25, 8(r7)
+ or r26, r10, r26
+ stw r26, 12(r7)
+ or r27, r11, r27
+ stwu r27, 16(r7)
+ mr r0, r12
+ bdnz L(loopU)
+
+ srw r9, r28, r6
+ slw r24, r28, r8
+ srw r10, r29, r6
+ slw r25, r29, r8
+ srw r11, r30, r6
+ slw r26, r30, r8
+ srw r12, r31, r6
+ slw r27, r31, r8
+ or r24, r0, r24
+ stw r24, 4(r7)
+ or r25, r9, r25
+ stw r25, 8(r7)
+ or r26, r10, r26
+ stw r26, 12(r7)
+ or r27, r11, r27
+ stw r27, 16(r7)
+
+ stw r12, 20(r7)
+ lmw r24, -32(r1) C restore registers
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/sec_tabselect.asm b/gmp/mpn/powerpc32/sec_tabselect.asm
new file mode 100644
index 0000000000..a3f24d5678
--- /dev/null
+++ b/gmp/mpn/powerpc32/sec_tabselect.asm
@@ -0,0 +1,141 @@
+dnl PowerPC-32 mpn_sec_tabselect.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2011-2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: ?
+C 75x (G3): ?
+C 7400,7410 (G4): 2.5
+C 744x,745x (G4+): 2.0
+C power4/ppc970: 2.0
+C power5: ?
+
+define(`rp', `r3')
+define(`tp', `r4')
+define(`n', `r5')
+define(`nents', `r6')
+define(`which', `r7')
+
+define(`i', `r8')
+define(`j', `r9')
+define(`stride', `r12')
+define(`mask', `r11')
+
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+ addic. j, n, -4 C outer loop induction variable
+ stmw r27, -32(r1)
+ slwi stride, n, 2
+
+ blt cr0, L(outer_end)
+L(outer_top):
+ mtctr nents
+ mr r10, tp
+ li r28, 0
+ li r29, 0
+ li r30, 0
+ li r31, 0
+ addic. j, j, -4 C outer loop induction variable
+ mr i, which
+
+ ALIGN(16)
+L(top): addic i, i, -1 C set carry iff i != 0
+ subfe mask, mask, mask
+ lwz r0, 0(tp)
+ lwz r27, 4(tp)
+ and r0, r0, mask
+ and r27, r27, mask
+ or r28, r28, r0
+ or r29, r29, r27
+ lwz r0, 8(tp)
+ lwz r27, 12(tp)
+ and r0, r0, mask
+ and r27, r27, mask
+ or r30, r30, r0
+ or r31, r31, r27
+ add tp, tp, stride
+ bdnz L(top)
+
+ stw r28, 0(rp)
+ stw r29, 4(rp)
+ stw r30, 8(rp)
+ stw r31, 12(rp)
+ addi tp, r10, 16
+ addi rp, rp, 16
+ bge cr0, L(outer_top)
+L(outer_end):
+
+ andi. r0, n, 2
+ beq cr0, L(b0x)
+L(b1x): mtctr nents
+ mr r10, tp
+ li r28, 0
+ li r29, 0
+ mr i, which
+ ALIGN(16)
+L(tp2): addic i, i, -1
+ subfe mask, mask, mask
+ lwz r0, 0(tp)
+ lwz r27, 4(tp)
+ and r0, r0, mask
+ and r27, r27, mask
+ or r28, r28, r0
+ or r29, r29, r27
+ add tp, tp, stride
+ bdnz L(tp2)
+ stw r28, 0(rp)
+ stw r29, 4(rp)
+ addi tp, r10, 8
+ addi rp, rp, 8
+
+L(b0x): andi. r0, n, 1
+ beq cr0, L(b00)
+L(b01): mtctr nents
+ mr r10, tp
+ li r28, 0
+ mr i, which
+ ALIGN(16)
+L(tp1): addic i, i, -1
+ subfe mask, mask, mask
+ lwz r0, 0(tp)
+ and r0, r0, mask
+ or r28, r28, r0
+ add tp, tp, stride
+ bdnz L(tp1)
+ stw r28, 0(rp)
+
+L(b00): lmw r27, -32(r1)
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/sqr_diag_addlsh1.asm b/gmp/mpn/powerpc32/sqr_diag_addlsh1.asm
new file mode 100644
index 0000000000..f7aba33ee5
--- /dev/null
+++ b/gmp/mpn/powerpc32/sqr_diag_addlsh1.asm
@@ -0,0 +1,80 @@
+dnl PowerPC-32 mpn_sqr_diag_addlsh1.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e ?
+C 604e ?
+C 75x (G3) ?
+C 7400,7410 (G4) ?
+C 744x,745x (G4+) 6
+C power4/ppc970 ?
+C power5 ?
+
+C This has been feebly optimised for 7447 but not for any other CPU.
+
+define(`rp', r3)
+define(`tp', r4)
+define(`up', r5)
+define(`n', r6)
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+ addi n, n, -1
+ addi tp, tp, -4
+ mtctr n
+ lwz r0, 0(up)
+ li r10, 0
+ mullw r7, r0, r0
+ stw r7, 0(rp)
+ mulhwu r6, r0, r0
+ addic r31, r31, 0 C clear CF
+
+ ALIGN(16)
+L(top): lwzu r0, 4(up)
+ mullw r7, r0, r0
+ lwz r8, 4(tp)
+ lwzu r9, 8(tp)
+ rlwimi r10, r8, 1,0,30
+ srwi r11, r8, 31
+ rlwimi r11, r9, 1,0,30
+ adde r10, r10, r6
+ adde r11, r11, r7
+ stw r10, 4(rp)
+ srwi r10, r9, 31
+ mulhwu r6, r0, r0
+ stwu r11, 8(rp)
+ bdnz L(top)
+
+ adde r10, r10, r6
+ stw r10, 4(rp)
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/sublsh1_n.asm b/gmp/mpn/powerpc32/sublsh1_n.asm
new file mode 100644
index 0000000000..6dc6460016
--- /dev/null
+++ b/gmp/mpn/powerpc32/sublsh1_n.asm
@@ -0,0 +1,101 @@
+dnl PowerPC-32 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 4.0
+C 75x (G3): 5.0
+C 7400,7410 (G4): 5.0
+C 744x,745x (G4+): 5.0
+C power4/ppc970: 4.25
+C power5: 5.0
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C vp r5
+C n r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_sublsh1_n)
+ mtctr r6 C copy n in ctr
+
+ lwz v0, 0(vp) C load v limb
+ lwz u0, 0(up) C load u limb
+ addic up, up, -4 C update up; set cy
+ addi rp, rp, -4 C update rp
+ slwi s1, v0, 1
+ bdz L(end) C If done, skip loop
+
+L(loop):
+ lwz v1, 4(vp) C load v limb
+ subfe s1, s1, u0 C add limbs with cy, set cy
+ srwi s0, v0, 31 C shift down previous v limb
+ stw s1, 4(rp) C store result limb
+ lwzu u0, 8(up) C load u limb and update up
+ rlwimi s0, v1, 1, 0,30 C left shift v limb and merge with prev v limb
+
+ bdz L(exit) C decrement ctr and exit if done
+
+ lwzu v0, 8(vp) C load v limb and update vp
+ subfe s0, s0, u0 C add limbs with cy, set cy
+ srwi s1, v1, 31 C shift down previous v limb
+ stwu s0, 8(rp) C store result limb and update rp
+ lwz u0, 4(up) C load u limb
+ rlwimi s1, v0, 1, 0,30 C left shift v limb and merge with prev v limb
+
+ bdnz L(loop) C decrement ctr and loop back
+
+L(end): subfe r7, s1, u0
+ srwi r4, v0, 31
+ stw r7, 4(rp) C store last result limb
+ subfze r3, r4
+ neg r3, r3
+ blr
+L(exit):
+ subfe r7, s0, u0
+ srwi r4, v1, 31
+ stw r7, 8(rp) C store last result limb
+ subfze r3, r4
+ neg r3, r3
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/submul_1.asm b/gmp/mpn/powerpc32/submul_1.asm
new file mode 100644
index 0000000000..9fcdaa291b
--- /dev/null
+++ b/gmp/mpn/powerpc32/submul_1.asm
@@ -0,0 +1,147 @@
+dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright 1995, 1997, 1998, 2000, 2002, 2005 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 7.5
+C 75x (G3): 9.3-15
+C 7400,7410 (G4): 9.3-15
+C 744x,745x (G4+): 10.5
+C power4/ppc970: 6.75
+C power5: 6.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C vl r6
+
+C This is optimized for the PPC604. See addmul_1.asm for additional comments.
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ cmpwi cr0,r5,9 C more than 9 limbs?
+ bgt cr0,L(big) C branch if more than 9 limbs
+
+ mtctr r5
+ lwz r0,0(r4)
+ mullw r7,r0,r6
+ mulhwu r10,r0,r6
+ lwz r9,0(r3)
+ subfc r8,r7,r9
+ addc r7,r7,r8 C invert cy (r7 is junk)
+ addi r3,r3,-4
+ bdz L(end)
+L(loop):
+ lwzu r0,4(r4)
+ stwu r8,4(r3)
+ mullw r8,r0,r6
+ adde r7,r8,r10
+ mulhwu r10,r0,r6
+ lwz r9,4(r3)
+ addze r10,r10
+ subfc r8,r7,r9
+ addc r7,r7,r8 C invert cy (r7 is junk)
+ bdnz L(loop)
+L(end): stw r8,4(r3)
+ addze r3,r10
+ blr
+
+L(big): stmw r30,-32(r1)
+ addi r5,r5,-1
+ srwi r0,r5,2
+ mtctr r0
+
+ lwz r7,0(r4)
+ mullw r8,r7,r6
+ mulhwu r0,r7,r6
+ lwz r7,0(r3)
+ subfc r7,r8,r7
+ addc r8,r8,r7
+ stw r7,0(r3)
+
+L(loopU):
+ lwz r7,4(r4)
+ lwz r12,8(r4)
+ lwz r30,12(r4)
+ lwzu r31,16(r4)
+ mullw r8,r7,r6
+ mullw r9,r12,r6
+ mullw r10,r30,r6
+ mullw r11,r31,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ adde r9,r9,r0
+ mulhwu r0,r12,r6
+ lwz r12,8(r3)
+ adde r10,r10,r0
+ mulhwu r0,r30,r6
+ lwz r30,12(r3)
+ adde r11,r11,r0
+ mulhwu r0,r31,r6
+ lwz r31,16(r3)
+ addze r0,r0 C new cy_limb
+ subfc r7,r8,r7
+ stw r7,4(r3)
+ subfe r12,r9,r12
+ stw r12,8(r3)
+ subfe r30,r10,r30
+ stw r30,12(r3)
+ subfe r31,r11,r31
+ stwu r31,16(r3)
+ subfe r11,r11,r11 C invert ...
+ addic r11,r11,1 C ... carry
+ bdnz L(loopU)
+
+ andi. r31,r5,3
+ mtctr r31
+ beq cr0,L(endx)
+
+L(loopE):
+ lwzu r7,4(r4)
+ mullw r8,r7,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ addze r0,r0 C new cy_limb
+ subfc r7,r8,r7
+ addc r8,r8,r7
+ stwu r7,4(r3)
+ bdnz L(loopE)
+L(endx):
+ addze r3,r0
+ lmw r30,-32(r1)
+ blr
+EPILOGUE(mpn_submul_1)
diff --git a/gmp/mpn/powerpc32/umul.asm b/gmp/mpn/powerpc32/umul.asm
new file mode 100644
index 0000000000..a5811e1651
--- /dev/null
+++ b/gmp/mpn/powerpc32/umul.asm
@@ -0,0 +1,50 @@
+dnl PowerPC-32 umul_ppmm -- support for longlong.h
+
+dnl Copyright 2000, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+
+ C r3 lowptr
+ C r4 m1
+ C r5 m2
+
+ mullw r0, r4, r5
+ mulhwu r9, r4, r5
+ stw r0, 0(r3)
+ mr r3, r9
+ blr
+
+EPILOGUE(mpn_umul_ppmm)
diff --git a/gmp/mpn/powerpc32/vmx/copyd.asm b/gmp/mpn/powerpc32/vmx/copyd.asm
new file mode 100644
index 0000000000..6aac6b8389
--- /dev/null
+++ b/gmp/mpn/powerpc32/vmx/copyd.asm
@@ -0,0 +1,203 @@
+dnl PowerPC-32/VMX and PowerPC-64/VMX mpn_copyd.
+
+dnl Copyright 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C 16-byte coaligned unaligned
+C cycles/limb cycles/limb
+C 7400,7410 (G4): 0.5 0.64
+C 744x,745x (G4+): 0.75 0.82
+C 970 (G5): 0.78 1.02 (64-bit limbs)
+
+C STATUS
+C * Works for all sizes and alignments.
+
+C TODO
+C * Optimize unaligned case. Some basic tests with 2-way and 4-way unrolling
+C indicate that we can reach 0.56 c/l for 7400, 0.75 c/l for 745x, and 0.80
+C c/l for 970.
+C * Consider using VMX instructions also for head and tail, by using some
+C read-modify-write tricks.
+C * The VMX code is used from the smallest sizes it handles, but measurements
+C show a large speed bump at the cutoff points. Small copying (perhaps
+C using some read-modify-write technique) should be optimized.
+C * Make a mpn_com based on this code.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR', eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+
+ifelse(GMP_LIMB_BITS,32,`
+ define(`LIMB32',` $1')
+ define(`LIMB64',`')
+',`
+ define(`LIMB32',`')
+ define(`LIMB64',` $1')
+')
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+
+define(`us', `v4')
+
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+
+LIMB32(`slwi. r0, n, 2 ')
+LIMB64(`sldi. r0, n, 3 ')
+ add rp, rp, r0
+ add up, up, r0
+
+LIMB32(`cmpi cr7, n, 11 ')
+LIMB64(`cmpdi cr7, n, 5 ')
+ bge cr7, L(big)
+
+ beqlr cr0
+
+C Handle small cases with plain operations
+ mtctr n
+L(topS):
+LIMB32(`lwz r0, -4(up) ')
+LIMB64(`ld r0, -8(up) ')
+ addi up, up, -GMP_LIMB_BYTES
+LIMB32(`stw r0, -4(rp) ')
+LIMB64(`std r0, -8(rp) ')
+ addi rp, rp, -GMP_LIMB_BYTES
+ bdnz L(topS)
+ blr
+
+C Handle large cases with VMX operations
+L(big):
+ addi rp, rp, -16
+ addi up, up, -16
+ mfspr r12, 256
+ oris r0, r12, 0xf800 C Set VRSAVE bit 0-4
+ mtspr 256, r0
+
+LIMB32(`rlwinm. r7, rp, 30,30,31') C (rp >> 2) mod 4
+LIMB64(`rlwinm. r7, rp, 29,31,31') C (rp >> 3) mod 2
+ beq L(rp_aligned)
+
+ subf n, r7, n
+L(top0):
+LIMB32(`lwz r0, 12(up) ')
+LIMB64(`ld r0, 8(up) ')
+ addi up, up, -GMP_LIMB_BYTES
+LIMB32(`addic. r7, r7, -1 ')
+LIMB32(`stw r0, 12(rp) ')
+LIMB64(`std r0, 8(rp) ')
+ addi rp, rp, -GMP_LIMB_BYTES
+LIMB32(`bne L(top0) ')
+
+L(rp_aligned):
+
+LIMB32(`rlwinm. r0, up, 30,30,31') C (up >> 2) mod 4
+LIMB64(`rlwinm. r0, up, 29,31,31') C (up >> 3) mod 2
+
+LIMB64(`srdi r7, n, 2 ') C loop count corresponding to n
+LIMB32(`srwi r7, n, 3 ') C loop count corresponding to n
+ mtctr r7 C copy n to count register
+
+ li r10, -16
+
+ beq L(up_aligned)
+
+ lvsl us, 0, up
+
+ addi up, up, 16
+LIMB32(`andi. r0, n, 0x4 ')
+LIMB64(`andi. r0, n, 0x2 ')
+ beq L(1)
+ lvx v0, 0, up
+ lvx v2, r10, up
+ vperm v3, v2, v0, us
+ stvx v3, 0, rp
+ addi up, up, -32
+ addi rp, rp, -16
+ b L(lpu)
+L(1): lvx v2, 0, up
+ addi up, up, -16
+ b L(lpu)
+
+ ALIGN(32)
+L(lpu): lvx v0, 0, up
+ vperm v3, v0, v2, us
+ stvx v3, 0, rp
+ lvx v2, r10, up
+ addi up, up, -32
+ vperm v3, v2, v0, us
+ stvx v3, r10, rp
+ addi rp, rp, -32
+ bdnz L(lpu)
+
+ b L(tail)
+
+L(up_aligned):
+
+LIMB32(`andi. r0, n, 0x4 ')
+LIMB64(`andi. r0, n, 0x2 ')
+ beq L(lpa)
+ lvx v0, 0, up
+ stvx v0, 0, rp
+ addi up, up, -16
+ addi rp, rp, -16
+ b L(lpa)
+
+ ALIGN(32)
+L(lpa): lvx v0, 0, up
+ lvx v1, r10, up
+ addi up, up, -32
+ nop
+ stvx v0, 0, rp
+ stvx v1, r10, rp
+ addi rp, rp, -32
+ bdnz L(lpa)
+
+L(tail):
+LIMB32(`rlwinm. r7, n, 0,30,31 ') C r7 = n mod 4
+LIMB64(`rlwinm. r7, n, 0,31,31 ') C r7 = n mod 2
+ beq L(ret)
+LIMB32(`li r10, 12 ')
+L(top2):
+LIMB32(`lwzx r0, r10, up ')
+LIMB64(`ld r0, 8(up) ')
+LIMB32(`addic. r7, r7, -1 ')
+LIMB32(`stwx r0, r10, rp ')
+LIMB64(`std r0, 8(rp) ')
+LIMB32(`addi r10, r10, -GMP_LIMB_BYTES')
+LIMB32(`bne L(top2) ')
+
+L(ret): mtspr 256, r12
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/vmx/copyi.asm b/gmp/mpn/powerpc32/vmx/copyi.asm
new file mode 100644
index 0000000000..a97a0fa6dc
--- /dev/null
+++ b/gmp/mpn/powerpc32/vmx/copyi.asm
@@ -0,0 +1,198 @@
+dnl PowerPC-32/VMX and PowerPC-64/VMX mpn_copyi.
+
+dnl Copyright 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C 16-byte coaligned unaligned
+C cycles/limb cycles/limb
+C 7400,7410 (G4): 0.5 0.64
+C 744x,745x (G4+): 0.75 0.82
+C 970 (G5): 0.78 1.02 (64-bit limbs)
+
+C STATUS
+C * Works for all sizes and alignments.
+
+C TODO
+C * Optimize unaligned case. Some basic tests with 2-way and 4-way unrolling
+C indicate that we can reach 0.56 c/l for 7400, 0.75 c/l for 745x, and 0.80
+C c/l for 970.
+C * Consider using VMX instructions also for head and tail, by using some
+C read-modify-write tricks.
+C * The VMX code is used from the smallest sizes it handles, but measurements
+C show a large speed bump at the cutoff points. Small copying (perhaps
+C using some read-modify-write technique) should be optimized.
+C * Make a mpn_com based on this code.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR', eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+
+ifelse(GMP_LIMB_BITS,32,`
+ define(`LIMB32',` $1')
+ define(`LIMB64',`')
+',`
+ define(`LIMB32',`')
+ define(`LIMB64',` $1')
+')
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+
+define(`us', `v4')
+
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+
+LIMB32(`cmpi cr7, n, 11 ')
+LIMB64(`cmpdi cr7, n, 5 ')
+ bge cr7, L(big)
+
+ or. r0, n, n
+ beqlr cr0
+
+C Handle small cases with plain operations
+ mtctr n
+L(topS):
+LIMB32(`lwz r0, 0(up) ')
+LIMB64(`ld r0, 0(up) ')
+ addi up, up, GMP_LIMB_BYTES
+LIMB32(`stw r0, 0(rp) ')
+LIMB64(`std r0, 0(rp) ')
+ addi rp, rp, GMP_LIMB_BYTES
+ bdnz L(topS)
+ blr
+
+C Handle large cases with VMX operations
+L(big):
+ mfspr r12, 256
+ oris r0, r12, 0xf800 C Set VRSAVE bit 0-4
+ mtspr 256, r0
+
+LIMB32(`rlwinm. r7, rp, 30,30,31') C (rp >> 2) mod 4
+LIMB64(`rlwinm. r7, rp, 29,31,31') C (rp >> 3) mod 2
+ beq L(rp_aligned)
+
+ subfic r7, r7, LIMBS_PER_VR
+ subf n, r7, n
+L(top0):
+LIMB32(`lwz r0, 0(up) ')
+LIMB64(`ld r0, 0(up) ')
+ addi up, up, GMP_LIMB_BYTES
+LIMB32(`addic. r7, r7, -1 ')
+LIMB32(`stw r0, 0(rp) ')
+LIMB64(`std r0, 0(rp) ')
+ addi rp, rp, GMP_LIMB_BYTES
+LIMB32(`bne L(top0) ')
+
+L(rp_aligned):
+
+LIMB32(`rlwinm. r0, up, 30,30,31') C (up >> 2) mod 4
+LIMB64(`rlwinm. r0, up, 29,31,31') C (up >> 3) mod 2
+
+LIMB64(`srdi r7, n, 2 ') C loop count corresponding to n
+LIMB32(`srwi r7, n, 3 ') C loop count corresponding to n
+ mtctr r7 C copy n to count register
+
+ li r10, 16
+
+ beq L(up_aligned)
+
+ lvsl us, 0, up
+
+LIMB32(`andi. r0, n, 0x4 ')
+LIMB64(`andi. r0, n, 0x2 ')
+ beq L(1)
+ lvx v0, 0, up
+ lvx v2, r10, up
+ vperm v3, v0, v2, us
+ stvx v3, 0, rp
+ addi up, up, 32
+ addi rp, rp, 16
+ b L(lpu)
+L(1): lvx v2, 0, up
+ addi up, up, 16
+ b L(lpu)
+
+ ALIGN(32)
+L(lpu): lvx v0, 0, up
+ vperm v3, v2, v0, us
+ stvx v3, 0, rp
+ lvx v2, r10, up
+ addi up, up, 32
+ vperm v3, v0, v2, us
+ stvx v3, r10, rp
+ addi rp, rp, 32
+ bdnz L(lpu)
+
+ addi up, up, -16
+ b L(tail)
+
+L(up_aligned):
+
+LIMB32(`andi. r0, n, 0x4 ')
+LIMB64(`andi. r0, n, 0x2 ')
+ beq L(lpa)
+ lvx v0, 0, up
+ stvx v0, 0, rp
+ addi up, up, 16
+ addi rp, rp, 16
+ b L(lpa)
+
+ ALIGN(32)
+L(lpa): lvx v0, 0, up
+ lvx v1, r10, up
+ addi up, up, 32
+ nop
+ stvx v0, 0, rp
+ stvx v1, r10, rp
+ addi rp, rp, 32
+ bdnz L(lpa)
+
+L(tail):
+LIMB32(`rlwinm. r7, n, 0,30,31 ') C r7 = n mod 4
+LIMB64(`rlwinm. r7, n, 0,31,31 ') C r7 = n mod 2
+ beq L(ret)
+LIMB32(`li r10, 0 ')
+L(top2):
+LIMB32(`lwzx r0, r10, up ')
+LIMB64(`ld r0, 0(up) ')
+LIMB32(`addic. r7, r7, -1 ')
+LIMB32(`stwx r0, r10, rp ')
+LIMB64(`std r0, 0(rp) ')
+LIMB32(`addi r10, r10, GMP_LIMB_BYTES')
+LIMB32(`bne L(top2) ')
+
+L(ret): mtspr 256, r12
+ blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/vmx/logops_n.asm b/gmp/mpn/powerpc32/vmx/logops_n.asm
new file mode 100644
index 0000000000..d656d3b73f
--- /dev/null
+++ b/gmp/mpn/powerpc32/vmx/logops_n.asm
@@ -0,0 +1,310 @@
+dnl PowerPC-32/VMX and PowerPC-64/VMX mpn_and_n, mpn_andn_n, mpn_nand_n,
+dnl mpn_ior_n, mpn_iorn_n, mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise
+dnl logical operations.
+
+dnl Copyright 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C and,ior,andn,nior,xor iorn,xnor nand
+C cycles/limb cycles/limb cycles/limb
+C 7400,7410 (G4): 1.39 ? ?
+C 744x,745x (G4+): 1.14 1.39 1.39
+C 970: 1.7 2.0 2.0
+
+C STATUS
+C * Works for all sizes and alignment for 32-bit limbs.
+C * Works for n >= 4 for 64-bit limbs; untested for smaller operands.
+C * Current performance makes this pointless for 970
+
+C TODO
+C * Might want to make variants when just one of the source operands needs
+C vperm, and when neither needs it. The latter runs 50% faster on 7400.
+C * Idea: If the source operands are equally aligned, we could do the logops
+C first, then vperm before storing! That means we never need more than one
+C vperm, ever!
+C * Perhaps align `rp' after initial alignment loop?
+C * Instead of having scalar code in the beginning and end, consider using
+C read-modify-write vector code.
+C * Software pipeline? Hopefully not too important, this is hairy enough
+C already.
+C * At least be more clever about operand loading, i.e., load v operands before
+C u operands, since v operands are sometimes negated.
+
+define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
+define(`LIMBS_PER_VR', eval(16/GMP_LIMB_BYTES))
+define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
+
+define(`vnegb', `') C default neg-before to null
+define(`vnega', `') C default neg-before to null
+
+ifdef(`OPERATION_and_n',
+` define(`func', `mpn_and_n')
+ define(`logopS',`and $1,$2,$3')
+ define(`logop', `vand $1,$2,$3')')
+ifdef(`OPERATION_andn_n',
+` define(`func', `mpn_andn_n')
+ define(`logopS',`andc $1,$2,$3')
+ define(`logop', `vandc $1,$2,$3')')
+ifdef(`OPERATION_nand_n',
+` define(`func', `mpn_nand_n')
+ define(`logopS',`nand $1,$2,$3')
+ define(`logop', `vand $1,$2,$3')
+ define(`vnega', `vnor $1,$2,$2')')
+ifdef(`OPERATION_ior_n',
+` define(`func', `mpn_ior_n')
+ define(`logopS',`or $1,$2,$3')
+ define(`logop', `vor $1,$2,$3')')
+ifdef(`OPERATION_iorn_n',
+` define(`func', `mpn_iorn_n')
+ define(`logopS',`orc $1,$2,$3')
+ define(`vnegb', `vnor $1,$2,$2')
+ define(`logop', `vor $1,$2,$3')')
+ifdef(`OPERATION_nior_n',
+` define(`func', `mpn_nior_n')
+ define(`logopS',`nor $1,$2,$3')
+ define(`logop', `vnor $1,$2,$3')')
+ifdef(`OPERATION_xor_n',
+` define(`func', `mpn_xor_n')
+ define(`logopS',`xor $1,$2,$3')
+ define(`logop', `vxor $1,$2,$3')')
+ifdef(`OPERATION_xnor_n',
+` define(`func',`mpn_xnor_n')
+ define(`logopS',`eqv $1,$2,$3')
+ define(`vnegb', `vnor $1,$2,$2')
+ define(`logop', `vxor $1,$2,$3')')
+
+ifelse(GMP_LIMB_BITS,`32',`
+ define(`LIMB32',` $1')
+ define(`LIMB64',`')
+',`
+ define(`LIMB32',`')
+ define(`LIMB64',` $1')
+')
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n', `r6')
+
+define(`us', `v8')
+define(`vs', `v9')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+
+LIMB32(`cmpwi cr0, n, 8 ')
+LIMB64(`cmpdi cr0, n, 4 ')
+ bge L(big)
+
+ mtctr n
+
+LIMB32(`lwz r8, 0(up) ')
+LIMB32(`lwz r9, 0(vp) ')
+LIMB32(`logopS( r0, r8, r9) ')
+LIMB32(`stw r0, 0(rp) ')
+LIMB32(`bdz L(endS) ')
+
+L(topS):
+LIMB32(`lwzu r8, 4(up) ')
+LIMB64(`ld r8, 0(up) ')
+LIMB64(`addi up, up, GMP_LIMB_BYTES ')
+LIMB32(`lwzu r9, 4(vp) ')
+LIMB64(`ld r9, 0(vp) ')
+LIMB64(`addi vp, vp, GMP_LIMB_BYTES ')
+ logopS( r0, r8, r9)
+LIMB32(`stwu r0, 4(rp) ')
+LIMB64(`std r0, 0(rp) ')
+LIMB64(`addi rp, rp, GMP_LIMB_BYTES ')
+ bdnz L(topS)
+L(endS):
+ blr
+
+L(big): mfspr r12, 256
+ oris r0, r12, 0xfffc C Set VRSAVE bit 0-13 FIXME
+ mtspr 256, r0
+
+C First loop until the destination is 16-byte aligned. This will execute 0 or 1
+C times for 64-bit machines, and 0 to 3 times for 32-bit machines.
+
+LIMB32(`rlwinm. r0, rp, 30,30,31') C (rp >> 2) mod 4
+LIMB64(`rlwinm. r0, rp, 29,31,31') C (rp >> 3) mod 2
+ beq L(aligned)
+
+ subfic r7, r0, LIMBS_PER_VR
+LIMB32(`li r10, 0 ')
+ subf n, r7, n
+L(top0):
+LIMB32(`lwz r8, 0(up) ')
+LIMB64(`ld r8, 0(up) ')
+ addi up, up, GMP_LIMB_BYTES
+LIMB32(`lwz r9, 0(vp) ')
+LIMB64(`ld r9, 0(vp) ')
+ addi vp, vp, GMP_LIMB_BYTES
+LIMB32(`addic. r7, r7, -1 ')
+ logopS( r0, r8, r9)
+LIMB32(`stwx r0, r10, rp ')
+LIMB64(`std r0, 0(rp) ')
+LIMB32(`addi r10, r10, GMP_LIMB_BYTES')
+LIMB32(`bne L(top0) ')
+
+ addi rp, rp, 16 C update rp, but preserve its alignment
+
+L(aligned):
+LIMB64(`srdi r7, n, 1 ') C loop count corresponding to n
+LIMB32(`srwi r7, n, 2 ') C loop count corresponding to n
+ mtctr r7 C copy n to count register
+
+ li r10, 16
+ lvsl us, 0, up
+ lvsl vs, 0, vp
+
+ lvx v2, 0, up
+ lvx v3, 0, vp
+ bdnz L(gt1)
+ lvx v0, r10, up
+ lvx v1, r10, vp
+ vperm v4, v2, v0, us
+ vperm v5, v3, v1, vs
+ vnegb( v5, v5)
+ logop( v6, v4, v5)
+ vnega( v6, v6)
+ stvx v6, 0, rp
+ addi up, up, 16
+ addi vp, vp, 16
+ addi rp, rp, 4
+ b L(tail)
+
+L(gt1): addi up, up, 16
+ addi vp, vp, 16
+
+L(top): lvx v0, 0, up
+ lvx v1, 0, vp
+ vperm v4, v2, v0, us
+ vperm v5, v3, v1, vs
+ vnegb( v5, v5)
+ logop( v6, v4, v5)
+ vnega( v6, v6)
+ stvx v6, 0, rp
+ bdz L(end)
+ lvx v2, r10, up
+ lvx v3, r10, vp
+ vperm v4, v0, v2, us
+ vperm v5, v1, v3, vs
+ vnegb( v5, v5)
+ logop( v6, v4, v5)
+ vnega( v6, v6)
+ stvx v6, r10, rp
+ addi up, up, 32
+ addi vp, vp, 32
+ addi rp, rp, 32
+ bdnz L(top)
+
+ andi. r0, up, 15
+ vxor v0, v0, v0
+ beq 1f
+ lvx v0, 0, up
+1: andi. r0, vp, 15
+ vxor v1, v1, v1
+ beq 1f
+ lvx v1, 0, vp
+1: vperm v4, v2, v0, us
+ vperm v5, v3, v1, vs
+ vnegb( v5, v5)
+ logop( v6, v4, v5)
+ vnega( v6, v6)
+ stvx v6, 0, rp
+ addi rp, rp, 4
+ b L(tail)
+
+L(end): andi. r0, up, 15
+ vxor v2, v2, v2
+ beq 1f
+ lvx v2, r10, up
+1: andi. r0, vp, 15
+ vxor v3, v3, v3
+ beq 1f
+ lvx v3, r10, vp
+1: vperm v4, v0, v2, us
+ vperm v5, v1, v3, vs
+ vnegb( v5, v5)
+ logop( v6, v4, v5)
+ vnega( v6, v6)
+ stvx v6, r10, rp
+
+ addi up, up, 16
+ addi vp, vp, 16
+ addi rp, rp, 20
+
+L(tail):
+LIMB32(`rlwinm. r7, n, 0,30,31 ') C r7 = n mod 4
+LIMB64(`rlwinm. r7, n, 0,31,31 ') C r7 = n mod 2
+ beq L(ret)
+ addi rp, rp, 15
+LIMB32(`rlwinm rp, rp, 0,0,27 ')
+LIMB64(`rldicr rp, rp, 0,59 ')
+ li r10, 0
+L(top2):
+LIMB32(`lwzx r8, r10, up ')
+LIMB64(`ldx r8, r10, up ')
+LIMB32(`lwzx r9, r10, vp ')
+LIMB64(`ldx r9, r10, vp ')
+LIMB32(`addic. r7, r7, -1 ')
+ logopS( r0, r8, r9)
+LIMB32(`stwx r0, r10, rp ')
+LIMB64(`std r0, 0(rp) ')
+LIMB32(`addi r10, r10, GMP_LIMB_BYTES')
+LIMB32(`bne L(top2) ')
+
+L(ret): mtspr 256, r12
+ blr
+EPILOGUE()
+
+C This works for 64-bit PowerPC, since a limb ptr can only be aligned
+C in 2 relevant ways, which means we can always find a pair of aligned
+C pointers of rp, up, and vp.
+C process words until rp is 16-byte aligned
+C if (((up | vp) & 15) == 0)
+C process with VMX without any vperm
+C else if ((up & 15) != 0 && (vp & 15) != 0)
+C process with VMX using vperm on store data
+C else if ((up & 15) != 0)
+C process with VMX using vperm on up data
+C else
+C process with VMX using vperm on vp data
+C
+C rlwinm, r0, up, 0,28,31
+C rlwinm r0, vp, 0,28,31
+C cmpwi cr7, r0, 0
+C cror cr6, cr0, cr7
+C crand cr0, cr0, cr7
diff --git a/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm b/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm
new file mode 100644
index 0000000000..9b7e4f1a50
--- /dev/null
+++ b/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm
@@ -0,0 +1,386 @@
+dnl PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
+
+dnl Copyright 2002, 2003, 2005-2007, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 603e: -
+C 604e: -
+C 75x (G3): -
+C 7400,7410 (G4): 1 simple load-use scheduling results in 0.75
+C 744x,745x (G4+): 0.75
+C ppc970: 0.75
+C power4: -
+C power5: -
+
+C TODO
+C * Either start using the low-end masking constants, or remove them.
+C * Merge multiple feed-in cases into a parameterized code block.
+C * Reduce register usage. It should be possible to almost halve it.
+
+define(`up', `r3')
+define(`n', `r4')
+
+define(`a0', `v3')
+define(`a1', `v4')
+define(`a2', `v5')
+define(`c0', `v6')
+define(`c1', `v7')
+define(`c2', `v8')
+define(`z', `v9')
+define(`x0', `v10')
+define(`x1', `v11')
+define(`x2', `v12')
+define(`x3', `v13')
+define(`pv', `v14')
+define(`y0', `v0')
+define(`y1', `v1')
+define(`y2', `v2')
+define(`y3', `v15')
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+ cmpwi cr0, n, 20 C tuned cutoff point
+ bge L(large)
+
+ li r9, 0 C result accumulator
+ mulli r10, n, 0xb C 0xb = ceil(32/3)
+ srwi. r10, r10, 5 C r10 = floor(n/3), n < 32
+ beq L(small_tail)
+ mtctr r10
+ lwz r6, 0(up)
+ lwz r7, 4(up)
+ lwzu r8, 8(up)
+ subf n, r10, n
+ subf n, r10, n
+ subf n, r10, n
+ bdz L(small_end)
+
+ ALIGN(16)
+L(los): rlwinm r0, r6, 0,8,31
+ add r9, r9, r0 C add 24b from u0
+ srwi r0, r6, 24
+ lwz r6, 4(up)
+ rlwimi r0, r7, 8, 0x00ffff00 C --111100
+ add r9, r9, r0 C add 8b from u0 and 16b from u1
+ srwi r0, r7, 16
+ lwz r7, 8(up)
+ rlwimi r0, r8, 16, 0x00ff0000 C --221111
+ add r9, r9, r0 C add 16b from u1 and 8b from u2
+ srwi r0, r8, 8 C --222222
+ lwzu r8, 12(up)
+ add r9, r9, r0 C add 24b from u2
+ bdnz L(los)
+L(small_end):
+ rlwinm r0, r6, 0,8,31
+ add r9, r9, r0 C add 24b from u0
+ srwi r0, r6, 24
+ rlwimi r0, r7, 8, 0x00ffff00 C --111100
+ add r9, r9, r0 C add 8b from u0 and 16b from u1
+ srwi r0, r7, 16
+ rlwimi r0, r8, 16, 0x00ff0000 C --221111
+ add r9, r9, r0 C add 16b from u1 and 8b from u2
+ srwi r0, r8, 8 C --222222
+ add r9, r9, r0 C add 24b from u2
+
+ addi up, up, 4
+ rlwinm r0, r9, 0,8,31
+ srwi r9, r9, 24
+ add r9, r9, r0
+
+L(small_tail):
+ cmpi cr0, n, 1
+ blt L(ret)
+
+ lwz r6, 0(up)
+ rlwinm r0, r6, 0,8,31
+ srwi r6, r6, 24
+ add r9, r9, r0
+ add r9, r9, r6
+
+ beq L(ret)
+
+ lwz r6, 4(up)
+ rlwinm r0, r6, 8,8,23
+ srwi r6, r6, 16
+ add r9, r9, r0
+ add r9, r9, r6
+
+L(ret): mr r3, r9
+ blr
+
+
+L(large):
+ mfspr r10, 256
+ oris r0, r10, 0xffff C Set VRSAVE bit 0-15
+ mtspr 256, r0
+
+ andi. r7, up, 15
+ vxor a0, v0, v0
+ lis r9, 0xaaaa
+ vxor a1, v0, v0
+ ori r9, r9, 0xaaab
+ vxor a2, v0, v0
+ li r5, 16
+ vxor c0, v0, v0
+ li r6, 32
+ vxor c1, v0, v0
+ LEAL( r11, cnsts) C CAUTION clobbers r0 for elf, darwin
+ vxor c2, v0, v0
+ vxor z, v0, v0
+
+ beq L(aligned16)
+
+ cmpwi cr7, r7, 8
+ bge cr7, L(na4)
+
+ lvx a2, 0, up
+ addi up, up, 16
+ vsldoi a2, a2, z, 4
+ vsldoi a2, z, a2, 12
+
+ addi n, n, 9
+ mulhwu r0, n, r9
+ srwi r0, r0, 3 C r0 = floor(n/12)
+ mtctr r0
+
+ mulli r8, r0, 12
+ subf n, r8, n
+ b L(2)
+
+L(na4): bne cr7, L(na8)
+
+ lvx a1, 0, up
+ addi up, up, -16
+ vsldoi a1, a1, z, 8
+ vsldoi a1, z, a1, 8
+
+ addi n, n, 6
+ mulhwu r0, n, r9
+ srwi r0, r0, 3 C r0 = floor(n/12)
+ mtctr r0
+
+ mulli r8, r0, 12
+ subf n, r8, n
+ b L(1)
+
+L(na8):
+ lvx a0, 0, up
+ vsldoi a0, a0, z, 12
+ vsldoi a0, z, a0, 4
+
+ addi n, n, 3
+ mulhwu r0, n, r9
+ srwi r0, r0, 3 C r0 = floor(n/12)
+ mtctr r0
+
+ mulli r8, r0, 12
+ subf n, r8, n
+ b L(0)
+
+L(aligned16):
+ mulhwu r0, n, r9
+ srwi r0, r0, 3 C r0 = floor(n/12)
+ mtctr r0
+
+ mulli r8, r0, 12
+ subf n, r8, n
+
+ lvx a0, 0, up
+L(0): lvx a1, r5, up
+L(1): lvx a2, r6, up
+ addi up, up, 48
+L(2): bdz L(end)
+ li r12, 256
+ li r9, 288
+ ALIGN(32)
+L(top):
+ lvx v0, 0, up
+ vaddcuw v10, a0, v0
+ vadduwm a0, a0, v0
+ vadduwm c0, c0, v10
+
+ lvx v1, r5, up
+ vaddcuw v10, a1, v1
+ vadduwm a1, a1, v1
+ vadduwm c1, c1, v10
+
+ lvx v2, r6, up
+ dcbt up, r12
+ dcbt up, r9
+ addi up, up, 48
+ vaddcuw v10, a2, v2
+ vadduwm a2, a2, v2
+ vadduwm c2, c2, v10
+ bdnz L(top)
+
+L(end):
+C n = 0...11
+ cmpwi cr0, n, 0
+ beq L(sum)
+ cmpwi cr0, n, 4
+ ble L(tail.1..4)
+ cmpwi cr0, n, 8
+ ble L(tail.5..8)
+
+L(tail.9..11):
+ lvx v0, 0, up
+ vaddcuw v10, a0, v0
+ vadduwm a0, a0, v0
+ vadduwm c0, c0, v10
+
+ lvx v1, r5, up
+ vaddcuw v10, a1, v1
+ vadduwm a1, a1, v1
+ vadduwm c1, c1, v10
+
+ lvx v2, r6, up
+
+ addi r8, r11, 96
+ rlwinm r3, n ,4,26,27
+ lvx v11, r3, r8
+ vand v2, v2, v11
+
+ vaddcuw v10, a2, v2
+ vadduwm a2, a2, v2
+ vadduwm c2, c2, v10
+ b L(sum)
+
+L(tail.5..8):
+ lvx v0, 0, up
+ vaddcuw v10, a0, v0
+ vadduwm a0, a0, v0
+ vadduwm c0, c0, v10
+
+ lvx v1, r5, up
+
+ addi r8, r11, 96
+ rlwinm r3, n ,4,26,27
+ lvx v11, r3, r8
+ vand v1, v1, v11
+
+ vaddcuw v10, a1, v1
+ vadduwm a1, a1, v1
+ vadduwm c1, c1, v10
+ b L(sum)
+
+L(tail.1..4):
+ lvx v0, 0, up
+
+ addi r8, r11, 96
+ rlwinm r3, n ,4,26,27
+ lvx v11, r3, r8
+ vand v0, v0, v11
+
+ vaddcuw v10, a0, v0
+ vadduwm a0, a0, v0
+ vadduwm c0, c0, v10
+
+L(sum): lvx pv, 0, r11
+ vperm x0, a0, z, pv C extract 4 24-bit field from a0
+ vperm y0, c2, z, pv
+ lvx pv, r5, r11
+ vperm x1, a1, z, pv C extract 4 24-bit field from a1
+ vperm y1, c0, z, pv C extract 4 24-bit field from a1
+ lvx pv, r6, r11
+ vperm x2, a2, z, pv C extract 4 24-bit field from a1
+ vperm y2, c1, z, pv C extract 4 24-bit field from a1
+ li r10, 48
+ lvx pv, r10, r11
+ vperm x3, a0, z, pv C extract remaining/partial a0 fields
+ vperm y3, c2, z, pv C extract remaining/partial a0 fields
+ li r10, 64
+ lvx pv, r10, r11
+ vperm x3, a1, x3, pv C insert remaining/partial a1 fields
+ vperm y3, c0, y3, pv C insert remaining/partial a1 fields
+ li r10, 80
+ lvx pv, r10, r11
+ vperm x3, a2, x3, pv C insert remaining/partial a2 fields
+ vperm y3, c1, y3, pv C insert remaining/partial a2 fields
+
+C We now have 4 128-bit accumulators to sum
+ vadduwm x0, x0, x1
+ vadduwm x2, x2, x3
+ vadduwm x0, x0, x2
+
+ vadduwm y0, y0, y1
+ vadduwm y2, y2, y3
+ vadduwm y0, y0, y2
+
+ vadduwm x0, x0, y0
+
+C Reduce 32-bit fields
+ vsumsws x0, x0, z
+
+ li r7, -16 C FIXME: does all ppc32 ABIs...
+ stvx x0, r7, r1 C FIXME: ...support storing below sp?
+ lwz r3, -4(r1)
+
+ mtspr 256, r10
+ blr
+EPILOGUE()
+
+C load | v0 | v1 | v2 |
+C acc | a0 | a1 | a2 |
+C carry | c0 | c1 | c2 |
+C | 0 1 2 3 | 4 5 6 7 | 8 9 10 11 | 128
+C |---|---|---|---|---|---|---|---|---|---|---|---| 32
+C | | | | | | | | | | | | | | | | | 24
+C | | | | | | | | | 48
+
+C $---------------$---------------$---------------$---------------$
+C | . . . . . . . . . . . . . . . |
+C |_______________________________________________________________|
+C | | | | | | |
+C <-hi16-> <--- 24 --> <--- 24 --> <--- 24 --> <--- 24 --> <-lo16->
+
+
+DEF_OBJECT(cnsts,16)
+C Permutation vectors in the order they are used above
+C # 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+ .byte 0x10,0x01,0x02,0x03, 0x10,0x06,0x07,0x00, 0x10,0x0b,0x04,0x05, 0x10,0x08,0x09,0x0a C a0
+ .byte 0x10,0x07,0x00,0x01, 0x10,0x04,0x05,0x06, 0x10,0x09,0x0a,0x0b, 0x10,0x0e,0x0f,0x08 C a1
+ .byte 0x10,0x00,0x01,0x02, 0x10,0x05,0x06,0x07, 0x10,0x0a,0x0b,0x04, 0x10,0x0f,0x08,0x09 C a2
+ .byte 0x10,0x0d,0x0e,0x0f, 0x10,0x10,0x10,0x0c, 0x10,0x10,0x10,0x10, 0x10,0x10,0x10,0x10 C part a0
+ .byte 0x10,0x11,0x12,0x13, 0x10,0x02,0x03,0x17, 0x10,0x10,0x0c,0x0d, 0x10,0x10,0x10,0x10 C part a1
+ .byte 0x10,0x11,0x12,0x13, 0x10,0x15,0x16,0x17, 0x10,0x03,0x1a,0x1b, 0x10,0x0c,0x0d,0x0e C part a2
+C Masks for high end of number
+ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+ .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+C Masks for low end of number
+C .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+C .byte 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+C .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+C .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff
+END_OBJECT(cnsts)
diff --git a/gmp/mpn/powerpc32/vmx/popcount.asm b/gmp/mpn/powerpc32/vmx/popcount.asm
new file mode 100644
index 0000000000..943c92d127
--- /dev/null
+++ b/gmp/mpn/powerpc32/vmx/popcount.asm
@@ -0,0 +1,34 @@
+dnl PowerPC-32/VMX mpn_popcount.
+
+dnl Copyright 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`powerpc64/vmx/popcount.asm')