summaryrefslogtreecommitdiff
path: root/rts/gmp
diff options
context:
space:
mode:
Diffstat (limited to 'rts/gmp')
-rw-r--r--rts/gmp/.gdbinit34
-rw-r--r--rts/gmp/AUTHORS12
-rw-r--r--rts/gmp/COPYING336
-rw-r--r--rts/gmp/COPYING.LIB515
-rw-r--r--rts/gmp/INSTALL146
-rw-r--r--rts/gmp/Makefile.am197
-rw-r--r--rts/gmp/Makefile.in932
-rw-r--r--rts/gmp/NEWS136
-rw-r--r--rts/gmp/README84
-rw-r--r--rts/gmp/acconfig.h92
-rw-r--r--rts/gmp/acinclude.m4835
-rw-r--r--rts/gmp/aclocal.m41963
-rw-r--r--rts/gmp/ansi2knr.136
-rw-r--r--rts/gmp/ansi2knr.c677
-rw-r--r--rts/gmp/assert.c52
-rw-r--r--rts/gmp/compat.c46
-rw-r--r--rts/gmp/config.guess1373
-rw-r--r--rts/gmp/config.in162
-rw-r--r--rts/gmp/config.sub1273
-rw-r--r--rts/gmp/configure5216
-rw-r--r--rts/gmp/configure.in950
-rw-r--r--rts/gmp/depcomp269
-rw-r--r--rts/gmp/errno.c26
-rw-r--r--rts/gmp/extract-dbl.c187
-rw-r--r--rts/gmp/gmp-impl.h1072
-rw-r--r--rts/gmp/gmp.h1083
-rw-r--r--rts/gmp/insert-dbl.c98
-rw-r--r--rts/gmp/install-sh251
-rw-r--r--rts/gmp/longlong.h1347
-rw-r--r--rts/gmp/ltconfig3109
-rw-r--r--rts/gmp/ltmain.sh4692
-rw-r--r--rts/gmp/mdate-sh92
-rw-r--r--rts/gmp/memory.c160
-rw-r--r--rts/gmp/missing244
-rw-r--r--rts/gmp/mkinstalldirs38
-rw-r--r--rts/gmp/mp.h124
-rw-r--r--rts/gmp/mp_bpl.c27
-rw-r--r--rts/gmp/mp_clz_tab.c36
-rw-r--r--rts/gmp/mp_minv_tab.c50
-rw-r--r--rts/gmp/mp_set_fns.c48
-rw-r--r--rts/gmp/mpn/Makefile.am94
-rw-r--r--rts/gmp/mpn/Makefile.in472
-rw-r--r--rts/gmp/mpn/README13
-rw-r--r--rts/gmp/mpn/a29k/add_n.s120
-rw-r--r--rts/gmp/mpn/a29k/addmul_1.s113
-rw-r--r--rts/gmp/mpn/a29k/lshift.s93
-rw-r--r--rts/gmp/mpn/a29k/mul_1.s97
-rw-r--r--rts/gmp/mpn/a29k/rshift.s89
-rw-r--r--rts/gmp/mpn/a29k/sub_n.s120
-rw-r--r--rts/gmp/mpn/a29k/submul_1.s116
-rw-r--r--rts/gmp/mpn/a29k/udiv.s30
-rw-r--r--rts/gmp/mpn/a29k/umul.s29
-rw-r--r--rts/gmp/mpn/alpha/README224
-rw-r--r--rts/gmp/mpn/alpha/add_n.asm114
-rw-r--r--rts/gmp/mpn/alpha/addmul_1.asm87
-rw-r--r--rts/gmp/mpn/alpha/cntlz.asm68
-rw-r--r--rts/gmp/mpn/alpha/default.m477
-rw-r--r--rts/gmp/mpn/alpha/ev5/add_n.asm143
-rw-r--r--rts/gmp/mpn/alpha/ev5/lshift.asm169
-rw-r--r--rts/gmp/mpn/alpha/ev5/rshift.asm167
-rw-r--r--rts/gmp/mpn/alpha/ev5/sub_n.asm143
-rw-r--r--rts/gmp/mpn/alpha/ev6/addmul_1.asm474
-rw-r--r--rts/gmp/mpn/alpha/ev6/gmp-mparam.h62
-rw-r--r--rts/gmp/mpn/alpha/gmp-mparam.h64
-rw-r--r--rts/gmp/mpn/alpha/invert_limb.asm345
-rw-r--r--rts/gmp/mpn/alpha/lshift.asm104
-rw-r--r--rts/gmp/mpn/alpha/mul_1.asm71
-rw-r--r--rts/gmp/mpn/alpha/rshift.asm102
-rw-r--r--rts/gmp/mpn/alpha/sub_n.asm114
-rw-r--r--rts/gmp/mpn/alpha/submul_1.asm87
-rw-r--r--rts/gmp/mpn/alpha/udiv_qrnnd.S151
-rw-r--r--rts/gmp/mpn/alpha/umul.asm39
-rw-r--r--rts/gmp/mpn/alpha/unicos.m463
-rw-r--r--rts/gmp/mpn/arm/add_n.S77
-rw-r--r--rts/gmp/mpn/arm/addmul_1.S89
-rw-r--r--rts/gmp/mpn/arm/gmp-mparam.h34
-rw-r--r--rts/gmp/mpn/arm/mul_1.S81
-rw-r--r--rts/gmp/mpn/arm/sub_n.S79
-rw-r--r--rts/gmp/mpn/asm-defs.m41182
-rw-r--r--rts/gmp/mpn/clipper/add_n.s48
-rw-r--r--rts/gmp/mpn/clipper/mul_1.s47
-rw-r--r--rts/gmp/mpn/clipper/sub_n.s48
-rw-r--r--rts/gmp/mpn/cray/README14
-rw-r--r--rts/gmp/mpn/cray/add_n.c96
-rw-r--r--rts/gmp/mpn/cray/addmul_1.c46
-rw-r--r--rts/gmp/mpn/cray/gmp-mparam.h27
-rw-r--r--rts/gmp/mpn/cray/mul_1.c44
-rw-r--r--rts/gmp/mpn/cray/mulww.f54
-rw-r--r--rts/gmp/mpn/cray/mulww.s245
-rw-r--r--rts/gmp/mpn/cray/sub_n.c97
-rw-r--r--rts/gmp/mpn/cray/submul_1.c46
-rw-r--r--rts/gmp/mpn/generic/add_n.c62
-rw-r--r--rts/gmp/mpn/generic/addmul_1.c65
-rw-r--r--rts/gmp/mpn/generic/addsub_n.c167
-rw-r--r--rts/gmp/mpn/generic/bdivmod.c120
-rw-r--r--rts/gmp/mpn/generic/bz_divrem_n.c153
-rw-r--r--rts/gmp/mpn/generic/cmp.c56
-rw-r--r--rts/gmp/mpn/generic/diveby3.c77
-rw-r--r--rts/gmp/mpn/generic/divrem.c101
-rw-r--r--rts/gmp/mpn/generic/divrem_1.c248
-rw-r--r--rts/gmp/mpn/generic/divrem_2.c151
-rw-r--r--rts/gmp/mpn/generic/dump.c76
-rw-r--r--rts/gmp/mpn/generic/gcd.c414
-rw-r--r--rts/gmp/mpn/generic/gcd_1.c77
-rw-r--r--rts/gmp/mpn/generic/gcdext.c700
-rw-r--r--rts/gmp/mpn/generic/get_str.c216
-rw-r--r--rts/gmp/mpn/generic/gmp-mparam.h27
-rw-r--r--rts/gmp/mpn/generic/hamdist.c94
-rw-r--r--rts/gmp/mpn/generic/inlines.c24
-rw-r--r--rts/gmp/mpn/generic/jacbase.c136
-rw-r--r--rts/gmp/mpn/generic/lshift.c87
-rw-r--r--rts/gmp/mpn/generic/mod_1.c175
-rw-r--r--rts/gmp/mpn/generic/mod_1_rs.c111
-rw-r--r--rts/gmp/mpn/generic/mul.c190
-rw-r--r--rts/gmp/mpn/generic/mul_1.c59
-rw-r--r--rts/gmp/mpn/generic/mul_basecase.c87
-rw-r--r--rts/gmp/mpn/generic/mul_fft.c772
-rw-r--r--rts/gmp/mpn/generic/mul_n.c1343
-rw-r--r--rts/gmp/mpn/generic/perfsqr.c123
-rw-r--r--rts/gmp/mpn/generic/popcount.c93
-rw-r--r--rts/gmp/mpn/generic/pre_mod_1.c69
-rw-r--r--rts/gmp/mpn/generic/random.c43
-rw-r--r--rts/gmp/mpn/generic/random2.c105
-rw-r--r--rts/gmp/mpn/generic/rshift.c88
-rw-r--r--rts/gmp/mpn/generic/sb_divrem_mn.c201
-rw-r--r--rts/gmp/mpn/generic/scan0.c62
-rw-r--r--rts/gmp/mpn/generic/scan1.c62
-rw-r--r--rts/gmp/mpn/generic/set_str.c159
-rw-r--r--rts/gmp/mpn/generic/sqr_basecase.c83
-rw-r--r--rts/gmp/mpn/generic/sqrtrem.c509
-rw-r--r--rts/gmp/mpn/generic/sub_n.c62
-rw-r--r--rts/gmp/mpn/generic/submul_1.c65
-rw-r--r--rts/gmp/mpn/generic/tdiv_qr.c401
-rw-r--r--rts/gmp/mpn/generic/udiv_w_sdiv.c131
-rw-r--r--rts/gmp/mpn/hppa/README91
-rw-r--r--rts/gmp/mpn/hppa/add_n.s58
-rw-r--r--rts/gmp/mpn/hppa/gmp-mparam.h63
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/addmul_1.s102
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/mul_1.s98
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s75
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S189
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s83
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s80
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s76
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S195
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/submul_1.s111
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S80
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/umul.s42
-rw-r--r--rts/gmp/mpn/hppa/hppa2_0/add_n.s88
-rw-r--r--rts/gmp/mpn/hppa/hppa2_0/sub_n.s88
-rw-r--r--rts/gmp/mpn/hppa/lshift.s66
-rw-r--r--rts/gmp/mpn/hppa/rshift.s63
-rw-r--r--rts/gmp/mpn/hppa/sub_n.s59
-rw-r--r--rts/gmp/mpn/hppa/udiv_qrnnd.s286
-rw-r--r--rts/gmp/mpn/i960/README9
-rw-r--r--rts/gmp/mpn/i960/add_n.s43
-rw-r--r--rts/gmp/mpn/i960/addmul_1.s48
-rw-r--r--rts/gmp/mpn/i960/mul_1.s45
-rw-r--r--rts/gmp/mpn/i960/sub_n.s43
-rw-r--r--rts/gmp/mpn/lisp/gmpasm-mode.el351
-rw-r--r--rts/gmp/mpn/m68k/add_n.S79
-rw-r--r--rts/gmp/mpn/m68k/lshift.S150
-rw-r--r--rts/gmp/mpn/m68k/mc68020/addmul_1.S83
-rw-r--r--rts/gmp/mpn/m68k/mc68020/mul_1.S90
-rw-r--r--rts/gmp/mpn/m68k/mc68020/submul_1.S83
-rw-r--r--rts/gmp/mpn/m68k/mc68020/udiv.S31
-rw-r--r--rts/gmp/mpn/m68k/mc68020/umul.S31
-rw-r--r--rts/gmp/mpn/m68k/rshift.S149
-rw-r--r--rts/gmp/mpn/m68k/sub_n.S79
-rw-r--r--rts/gmp/mpn/m68k/syntax.h177
-rw-r--r--rts/gmp/mpn/m88k/add_n.s104
-rw-r--r--rts/gmp/mpn/m88k/mc88110/add_n.S200
-rw-r--r--rts/gmp/mpn/m88k/mc88110/addmul_1.s61
-rw-r--r--rts/gmp/mpn/m88k/mc88110/mul_1.s59
-rw-r--r--rts/gmp/mpn/m88k/mc88110/sub_n.S276
-rw-r--r--rts/gmp/mpn/m88k/mul_1.s127
-rw-r--r--rts/gmp/mpn/m88k/sub_n.s106
-rw-r--r--rts/gmp/mpn/mips2/add_n.s120
-rw-r--r--rts/gmp/mpn/mips2/addmul_1.s97
-rw-r--r--rts/gmp/mpn/mips2/lshift.s95
-rw-r--r--rts/gmp/mpn/mips2/mul_1.s85
-rw-r--r--rts/gmp/mpn/mips2/rshift.s92
-rw-r--r--rts/gmp/mpn/mips2/sub_n.s120
-rw-r--r--rts/gmp/mpn/mips2/submul_1.s97
-rw-r--r--rts/gmp/mpn/mips2/umul.s30
-rw-r--r--rts/gmp/mpn/mips3/README23
-rw-r--r--rts/gmp/mpn/mips3/add_n.s120
-rw-r--r--rts/gmp/mpn/mips3/addmul_1.s97
-rw-r--r--rts/gmp/mpn/mips3/gmp-mparam.h58
-rw-r--r--rts/gmp/mpn/mips3/lshift.s95
-rw-r--r--rts/gmp/mpn/mips3/mul_1.s85
-rw-r--r--rts/gmp/mpn/mips3/rshift.s92
-rw-r--r--rts/gmp/mpn/mips3/sub_n.s120
-rw-r--r--rts/gmp/mpn/mips3/submul_1.s97
-rw-r--r--rts/gmp/mpn/mp_bases.c550
-rw-r--r--rts/gmp/mpn/ns32k/add_n.s46
-rw-r--r--rts/gmp/mpn/ns32k/addmul_1.s48
-rw-r--r--rts/gmp/mpn/ns32k/mul_1.s47
-rw-r--r--rts/gmp/mpn/ns32k/sub_n.s46
-rw-r--r--rts/gmp/mpn/ns32k/submul_1.s48
-rw-r--r--rts/gmp/mpn/pa64/README38
-rw-r--r--rts/gmp/mpn/pa64/add_n.s90
-rw-r--r--rts/gmp/mpn/pa64/addmul_1.S167
-rw-r--r--rts/gmp/mpn/pa64/gmp-mparam.h65
-rw-r--r--rts/gmp/mpn/pa64/lshift.s103
-rw-r--r--rts/gmp/mpn/pa64/mul_1.S158
-rw-r--r--rts/gmp/mpn/pa64/rshift.s100
-rw-r--r--rts/gmp/mpn/pa64/sub_n.s90
-rw-r--r--rts/gmp/mpn/pa64/submul_1.S170
-rw-r--r--rts/gmp/mpn/pa64/udiv_qrnnd.c111
-rw-r--r--rts/gmp/mpn/pa64/umul_ppmm.S74
-rw-r--r--rts/gmp/mpn/pa64w/README2
-rw-r--r--rts/gmp/mpn/pa64w/add_n.s90
-rw-r--r--rts/gmp/mpn/pa64w/addmul_1.S168
-rw-r--r--rts/gmp/mpn/pa64w/gmp-mparam.h65
-rw-r--r--rts/gmp/mpn/pa64w/lshift.s103
-rw-r--r--rts/gmp/mpn/pa64w/mul_1.S159
-rw-r--r--rts/gmp/mpn/pa64w/rshift.s100
-rw-r--r--rts/gmp/mpn/pa64w/sub_n.s90
-rw-r--r--rts/gmp/mpn/pa64w/submul_1.S171
-rw-r--r--rts/gmp/mpn/pa64w/udiv_qrnnd.c117
-rw-r--r--rts/gmp/mpn/pa64w/umul_ppmm.S72
-rw-r--r--rts/gmp/mpn/power/add_n.s79
-rw-r--r--rts/gmp/mpn/power/addmul_1.s122
-rw-r--r--rts/gmp/mpn/power/lshift.s56
-rw-r--r--rts/gmp/mpn/power/mul_1.s109
-rw-r--r--rts/gmp/mpn/power/rshift.s54
-rw-r--r--rts/gmp/mpn/power/sdiv.s34
-rw-r--r--rts/gmp/mpn/power/sub_n.s80
-rw-r--r--rts/gmp/mpn/power/submul_1.s127
-rw-r--r--rts/gmp/mpn/power/umul.s38
-rw-r--r--rts/gmp/mpn/powerpc32/add_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc32/addmul_1.asm124
-rw-r--r--rts/gmp/mpn/powerpc32/aix.m439
-rw-r--r--rts/gmp/mpn/powerpc32/gmp-mparam.h66
-rw-r--r--rts/gmp/mpn/powerpc32/lshift.asm145
-rw-r--r--rts/gmp/mpn/powerpc32/mul_1.asm86
-rw-r--r--rts/gmp/mpn/powerpc32/regmap.m434
-rw-r--r--rts/gmp/mpn/powerpc32/rshift.asm60
-rw-r--r--rts/gmp/mpn/powerpc32/sub_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc32/submul_1.asm130
-rw-r--r--rts/gmp/mpn/powerpc32/umul.asm32
-rw-r--r--rts/gmp/mpn/powerpc64/README36
-rw-r--r--rts/gmp/mpn/powerpc64/add_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc64/addmul_1.asm52
-rw-r--r--rts/gmp/mpn/powerpc64/addsub_n.asm107
-rw-r--r--rts/gmp/mpn/powerpc64/aix.m440
-rw-r--r--rts/gmp/mpn/powerpc64/copyd.asm45
-rw-r--r--rts/gmp/mpn/powerpc64/copyi.asm44
-rw-r--r--rts/gmp/mpn/powerpc64/gmp-mparam.h62
-rw-r--r--rts/gmp/mpn/powerpc64/lshift.asm159
-rw-r--r--rts/gmp/mpn/powerpc64/mul_1.asm49
-rw-r--r--rts/gmp/mpn/powerpc64/rshift.asm60
-rw-r--r--rts/gmp/mpn/powerpc64/sub_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc64/submul_1.asm54
-rw-r--r--rts/gmp/mpn/pyr/add_n.s76
-rw-r--r--rts/gmp/mpn/pyr/addmul_1.s45
-rw-r--r--rts/gmp/mpn/pyr/mul_1.s42
-rw-r--r--rts/gmp/mpn/pyr/sub_n.s76
-rw-r--r--rts/gmp/mpn/sh/add_n.s47
-rw-r--r--rts/gmp/mpn/sh/sh2/addmul_1.s53
-rw-r--r--rts/gmp/mpn/sh/sh2/mul_1.s50
-rw-r--r--rts/gmp/mpn/sh/sh2/submul_1.s53
-rw-r--r--rts/gmp/mpn/sh/sub_n.s47
-rw-r--r--rts/gmp/mpn/sparc32/README36
-rw-r--r--rts/gmp/mpn/sparc32/add_n.asm236
-rw-r--r--rts/gmp/mpn/sparc32/addmul_1.asm146
-rw-r--r--rts/gmp/mpn/sparc32/lshift.asm97
-rw-r--r--rts/gmp/mpn/sparc32/mul_1.asm137
-rw-r--r--rts/gmp/mpn/sparc32/rshift.asm93
-rw-r--r--rts/gmp/mpn/sparc32/sub_n.asm326
-rw-r--r--rts/gmp/mpn/sparc32/submul_1.asm146
-rw-r--r--rts/gmp/mpn/sparc32/udiv_fp.asm158
-rw-r--r--rts/gmp/mpn/sparc32/udiv_nfp.asm193
-rw-r--r--rts/gmp/mpn/sparc32/umul.asm68
-rw-r--r--rts/gmp/mpn/sparc32/v8/addmul_1.asm122
-rw-r--r--rts/gmp/mpn/sparc32/v8/mul_1.asm103
-rw-r--r--rts/gmp/mpn/sparc32/v8/submul_1.asm58
-rw-r--r--rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm122
-rw-r--r--rts/gmp/mpn/sparc32/v8/umul.asm31
-rw-r--r--rts/gmp/mpn/sparc32/v9/README4
-rw-r--r--rts/gmp/mpn/sparc32/v9/addmul_1.asm288
-rw-r--r--rts/gmp/mpn/sparc32/v9/gmp-mparam.h69
-rw-r--r--rts/gmp/mpn/sparc32/v9/mul_1.asm267
-rw-r--r--rts/gmp/mpn/sparc32/v9/submul_1.asm291
-rw-r--r--rts/gmp/mpn/sparc64/README48
-rw-r--r--rts/gmp/mpn/sparc64/add_n.asm172
-rw-r--r--rts/gmp/mpn/sparc64/addmul1h.asm203
-rw-r--r--rts/gmp/mpn/sparc64/addmul_1.asm114
-rw-r--r--rts/gmp/mpn/sparc64/copyi.asm79
-rw-r--r--rts/gmp/mpn/sparc64/gmp-mparam.h88
-rw-r--r--rts/gmp/mpn/sparc64/lshift.asm97
-rw-r--r--rts/gmp/mpn/sparc64/mul_1.asm113
-rw-r--r--rts/gmp/mpn/sparc64/mul_1h.asm183
-rw-r--r--rts/gmp/mpn/sparc64/rshift.asm94
-rw-r--r--rts/gmp/mpn/sparc64/sub_n.asm172
-rw-r--r--rts/gmp/mpn/sparc64/submul1h.asm204
-rw-r--r--rts/gmp/mpn/sparc64/submul_1.asm114
-rw-r--r--rts/gmp/mpn/thumb/add_n.s50
-rw-r--r--rts/gmp/mpn/thumb/sub_n.s50
-rw-r--r--rts/gmp/mpn/underscore.h26
-rw-r--r--rts/gmp/mpn/vax/add_n.s61
-rw-r--r--rts/gmp/mpn/vax/addmul_1.s126
-rw-r--r--rts/gmp/mpn/vax/lshift.s58
-rw-r--r--rts/gmp/mpn/vax/mul_1.s123
-rw-r--r--rts/gmp/mpn/vax/rshift.s56
-rw-r--r--rts/gmp/mpn/vax/sub_n.s61
-rw-r--r--rts/gmp/mpn/vax/submul_1.s126
-rw-r--r--rts/gmp/mpn/x86/README40
-rw-r--r--rts/gmp/mpn/x86/README.family333
-rw-r--r--rts/gmp/mpn/x86/addsub_n.S174
-rw-r--r--rts/gmp/mpn/x86/aors_n.asm187
-rw-r--r--rts/gmp/mpn/x86/aorsmul_1.asm134
-rw-r--r--rts/gmp/mpn/x86/copyd.asm80
-rw-r--r--rts/gmp/mpn/x86/copyi.asm79
-rw-r--r--rts/gmp/mpn/x86/diveby3.asm115
-rw-r--r--rts/gmp/mpn/x86/divrem_1.asm232
-rw-r--r--rts/gmp/mpn/x86/k6/README237
-rw-r--r--rts/gmp/mpn/x86/k6/aors_n.asm329
-rw-r--r--rts/gmp/mpn/x86/k6/aorsmul_1.asm372
-rw-r--r--rts/gmp/mpn/x86/k6/cross.pl141
-rw-r--r--rts/gmp/mpn/x86/k6/diveby3.asm110
-rw-r--r--rts/gmp/mpn/x86/k6/gmp-mparam.h97
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/copyd.asm179
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/copyi.asm196
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/lshift.asm286
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/rshift.asm285
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/com_n.asm91
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/logops_n.asm212
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/lshift.asm122
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/popham.asm238
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/rshift.asm122
-rw-r--r--rts/gmp/mpn/x86/k6/mul_1.asm272
-rw-r--r--rts/gmp/mpn/x86/k6/mul_basecase.asm600
-rw-r--r--rts/gmp/mpn/x86/k6/sqr_basecase.asm672
-rw-r--r--rts/gmp/mpn/x86/k7/README145
-rw-r--r--rts/gmp/mpn/x86/k7/aors_n.asm250
-rw-r--r--rts/gmp/mpn/x86/k7/aorsmul_1.asm364
-rw-r--r--rts/gmp/mpn/x86/k7/diveby3.asm131
-rw-r--r--rts/gmp/mpn/x86/k7/gmp-mparam.h100
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/copyd.asm136
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/copyi.asm147
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/divrem_1.asm718
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/lshift.asm472
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/mod_1.asm457
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/popham.asm239
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/rshift.asm471
-rw-r--r--rts/gmp/mpn/x86/k7/mul_1.asm265
-rw-r--r--rts/gmp/mpn/x86/k7/mul_basecase.asm593
-rw-r--r--rts/gmp/mpn/x86/k7/sqr_basecase.asm627
-rw-r--r--rts/gmp/mpn/x86/lshift.asm90
-rw-r--r--rts/gmp/mpn/x86/mod_1.asm141
-rw-r--r--rts/gmp/mpn/x86/mul_1.asm130
-rw-r--r--rts/gmp/mpn/x86/mul_basecase.asm209
-rw-r--r--rts/gmp/mpn/x86/p6/README95
-rw-r--r--rts/gmp/mpn/x86/p6/aorsmul_1.asm300
-rw-r--r--rts/gmp/mpn/x86/p6/diveby3.asm37
-rw-r--r--rts/gmp/mpn/x86/p6/gmp-mparam.h96
-rw-r--r--rts/gmp/mpn/x86/p6/mmx/divrem_1.asm677
-rw-r--r--rts/gmp/mpn/x86/p6/mmx/mod_1.asm444
-rw-r--r--rts/gmp/mpn/x86/p6/mmx/popham.asm31
-rw-r--r--rts/gmp/mpn/x86/p6/p3mmx/popham.asm30
-rw-r--r--rts/gmp/mpn/x86/p6/sqr_basecase.asm641
-rw-r--r--rts/gmp/mpn/x86/pentium/README77
-rw-r--r--rts/gmp/mpn/x86/pentium/aors_n.asm196
-rw-r--r--rts/gmp/mpn/x86/pentium/aorsmul_1.asm99
-rw-r--r--rts/gmp/mpn/x86/pentium/diveby3.asm183
-rw-r--r--rts/gmp/mpn/x86/pentium/gmp-mparam.h97
-rw-r--r--rts/gmp/mpn/x86/pentium/lshift.asm236
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h97
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/lshift.asm455
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/popham.asm30
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/rshift.asm460
-rw-r--r--rts/gmp/mpn/x86/pentium/mul_1.asm79
-rw-r--r--rts/gmp/mpn/x86/pentium/mul_basecase.asm135
-rw-r--r--rts/gmp/mpn/x86/pentium/rshift.asm236
-rw-r--r--rts/gmp/mpn/x86/pentium/sqr_basecase.asm520
-rw-r--r--rts/gmp/mpn/x86/rshift.asm92
-rw-r--r--rts/gmp/mpn/x86/udiv.asm44
-rw-r--r--rts/gmp/mpn/x86/umul.asm43
-rw-r--r--rts/gmp/mpn/x86/x86-defs.m4713
-rw-r--r--rts/gmp/mpn/z8000/add_n.s53
-rw-r--r--rts/gmp/mpn/z8000/gmp-mparam.h27
-rw-r--r--rts/gmp/mpn/z8000/mul_1.s68
-rw-r--r--rts/gmp/mpn/z8000/sub_n.s54
-rw-r--r--rts/gmp/mpn/z8000x/add_n.s56
-rw-r--r--rts/gmp/mpn/z8000x/sub_n.s56
-rw-r--r--rts/gmp/mpz/Makefile.am58
-rw-r--r--rts/gmp/mpz/Makefile.in457
-rw-r--r--rts/gmp/mpz/README23
-rw-r--r--rts/gmp/mpz/abs.c51
-rw-r--r--rts/gmp/mpz/add.c123
-rw-r--r--rts/gmp/mpz/add_ui.c84
-rw-r--r--rts/gmp/mpz/addmul_ui.c214
-rw-r--r--rts/gmp/mpz/and.c278
-rw-r--r--rts/gmp/mpz/array_init.c48
-rw-r--r--rts/gmp/mpz/bin_ui.c141
-rw-r--r--rts/gmp/mpz/bin_uiui.c120
-rw-r--r--rts/gmp/mpz/cdiv_q.c51
-rw-r--r--rts/gmp/mpz/cdiv_q_ui.c67
-rw-r--r--rts/gmp/mpz/cdiv_qr.c64
-rw-r--r--rts/gmp/mpz/cdiv_qr_ui.c71
-rw-r--r--rts/gmp/mpz/cdiv_r.c59
-rw-r--r--rts/gmp/mpz/cdiv_r_ui.c57
-rw-r--r--rts/gmp/mpz/cdiv_ui.c50
-rw-r--r--rts/gmp/mpz/clear.c35
-rw-r--r--rts/gmp/mpz/clrbit.c114
-rw-r--r--rts/gmp/mpz/cmp.c75
-rw-r--r--rts/gmp/mpz/cmp_si.c64
-rw-r--r--rts/gmp/mpz/cmp_ui.c53
-rw-r--r--rts/gmp/mpz/cmpabs.c57
-rw-r--r--rts/gmp/mpz/cmpabs_ui.c56
-rw-r--r--rts/gmp/mpz/com.c93
-rw-r--r--rts/gmp/mpz/divexact.c125
-rw-r--r--rts/gmp/mpz/dump.c44
-rw-r--r--rts/gmp/mpz/fac_ui.c157
-rw-r--r--rts/gmp/mpz/fdiv_q.c51
-rw-r--r--rts/gmp/mpz/fdiv_q_2exp.c104
-rw-r--r--rts/gmp/mpz/fdiv_q_ui.c65
-rw-r--r--rts/gmp/mpz/fdiv_qr.c64
-rw-r--r--rts/gmp/mpz/fdiv_qr_ui.c69
-rw-r--r--rts/gmp/mpz/fdiv_r.c58
-rw-r--r--rts/gmp/mpz/fdiv_r_2exp.c156
-rw-r--r--rts/gmp/mpz/fdiv_r_ui.c55
-rw-r--r--rts/gmp/mpz/fdiv_ui.c48
-rw-r--r--rts/gmp/mpz/fib_ui.c165
-rw-r--r--rts/gmp/mpz/fits_sint_p.c50
-rw-r--r--rts/gmp/mpz/fits_slong_p.c50
-rw-r--r--rts/gmp/mpz/fits_sshort_p.c50
-rw-r--r--rts/gmp/mpz/fits_uint_p.c41
-rw-r--r--rts/gmp/mpz/fits_ulong_p.c41
-rw-r--r--rts/gmp/mpz/fits_ushort_p.c41
-rw-r--r--rts/gmp/mpz/gcd.c180
-rw-r--r--rts/gmp/mpz/gcd_ui.c65
-rw-r--r--rts/gmp/mpz/gcdext.c137
-rw-r--r--rts/gmp/mpz/get_d.c128
-rw-r--r--rts/gmp/mpz/get_si.c43
-rw-r--r--rts/gmp/mpz/get_str.c118
-rw-r--r--rts/gmp/mpz/get_ui.c37
-rw-r--r--rts/gmp/mpz/getlimbn.c38
-rw-r--r--rts/gmp/mpz/hamdist.c62
-rw-r--r--rts/gmp/mpz/init.c36
-rw-r--r--rts/gmp/mpz/inp_raw.c101
-rw-r--r--rts/gmp/mpz/inp_str.c167
-rw-r--r--rts/gmp/mpz/invert.c77
-rw-r--r--rts/gmp/mpz/ior.c244
-rw-r--r--rts/gmp/mpz/iset.c49
-rw-r--r--rts/gmp/mpz/iset_d.c39
-rw-r--r--rts/gmp/mpz/iset_si.c49
-rw-r--r--rts/gmp/mpz/iset_str.c47
-rw-r--r--rts/gmp/mpz/iset_ui.c39
-rw-r--r--rts/gmp/mpz/jacobi.c53
-rw-r--r--rts/gmp/mpz/kronsz.c126
-rw-r--r--rts/gmp/mpz/kronuz.c115
-rw-r--r--rts/gmp/mpz/kronzs.c74
-rw-r--r--rts/gmp/mpz/kronzu.c66
-rw-r--r--rts/gmp/mpz/lcm.c61
-rw-r--r--rts/gmp/mpz/legendre.c184
-rw-r--r--rts/gmp/mpz/mod.c63
-rw-r--r--rts/gmp/mpz/mul.c131
-rw-r--r--rts/gmp/mpz/mul_2exp.c76
-rw-r--r--rts/gmp/mpz/mul_siui.c81
-rw-r--r--rts/gmp/mpz/neg.c53
-rw-r--r--rts/gmp/mpz/nextprime.c120
-rw-r--r--rts/gmp/mpz/out_raw.c89
-rw-r--r--rts/gmp/mpz/out_str.c108
-rw-r--r--rts/gmp/mpz/perfpow.c272
-rw-r--r--rts/gmp/mpz/perfsqr.c45
-rw-r--r--rts/gmp/mpz/popcount.c42
-rw-r--r--rts/gmp/mpz/pow_ui.c129
-rw-r--r--rts/gmp/mpz/powm.c364
-rw-r--r--rts/gmp/mpz/powm_ui.c248
-rw-r--r--rts/gmp/mpz/pprime_p.c242
-rw-r--r--rts/gmp/mpz/random.c56
-rw-r--r--rts/gmp/mpz/random2.c48
-rw-r--r--rts/gmp/mpz/realloc.c52
-rw-r--r--rts/gmp/mpz/remove.c93
-rw-r--r--rts/gmp/mpz/root.c183
-rw-r--r--rts/gmp/mpz/rrandomb.c117
-rw-r--r--rts/gmp/mpz/scan0.c35
-rw-r--r--rts/gmp/mpz/scan1.c35
-rw-r--r--rts/gmp/mpz/set.c48
-rw-r--r--rts/gmp/mpz/set_d.c96
-rw-r--r--rts/gmp/mpz/set_f.c64
-rw-r--r--rts/gmp/mpz/set_q.c36
-rw-r--r--rts/gmp/mpz/set_si.c48
-rw-r--r--rts/gmp/mpz/set_str.c157
-rw-r--r--rts/gmp/mpz/set_ui.c43
-rw-r--r--rts/gmp/mpz/setbit.c119
-rw-r--r--rts/gmp/mpz/size.c35
-rw-r--r--rts/gmp/mpz/sizeinbase.c60
-rw-r--r--rts/gmp/mpz/sqrt.c86
-rw-r--r--rts/gmp/mpz/sqrtrem.c111
-rw-r--r--rts/gmp/mpz/sub.c123
-rw-r--r--rts/gmp/mpz/sub_ui.c84
-rw-r--r--rts/gmp/mpz/swap.c52
-rw-r--r--rts/gmp/mpz/tdiv_q.c91
-rw-r--r--rts/gmp/mpz/tdiv_q_2exp.c68
-rw-r--r--rts/gmp/mpz/tdiv_q_ui.c64
-rw-r--r--rts/gmp/mpz/tdiv_qr.c130
-rw-r--r--rts/gmp/mpz/tdiv_qr_ui.c76
-rw-r--r--rts/gmp/mpz/tdiv_r.c98
-rw-r--r--rts/gmp/mpz/tdiv_r_2exp.c79
-rw-r--r--rts/gmp/mpz/tdiv_r_ui.c63
-rw-r--r--rts/gmp/mpz/tdiv_ui.c53
-rw-r--r--rts/gmp/mpz/tstbit.c70
-rw-r--r--rts/gmp/mpz/ui_pow_ui.c139
-rw-r--r--rts/gmp/mpz/urandomb.c49
-rw-r--r--rts/gmp/mpz/urandomm.c78
-rw-r--r--rts/gmp/mpz/xor.c217
-rw-r--r--rts/gmp/rand.c171
-rw-r--r--rts/gmp/randclr.c54
-rw-r--r--rts/gmp/randlc.c56
-rw-r--r--rts/gmp/randlc2x.c59
-rw-r--r--rts/gmp/randraw.c360
-rw-r--r--rts/gmp/randsd.c37
-rw-r--r--rts/gmp/randsdui.c37
-rw-r--r--rts/gmp/stack-alloc.c136
-rw-r--r--rts/gmp/stack-alloc.h64
-rw-r--r--rts/gmp/stamp-h.in1
-rw-r--r--rts/gmp/stamp-vti3
-rw-r--r--rts/gmp/urandom.h86
-rw-r--r--rts/gmp/version.c26
-rw-r--r--rts/gmp/version.texi3
524 files changed, 90586 insertions, 0 deletions
diff --git a/rts/gmp/.gdbinit b/rts/gmp/.gdbinit
new file mode 100644
index 0000000000..843c109e89
--- /dev/null
+++ b/rts/gmp/.gdbinit
@@ -0,0 +1,34 @@
+# Copyright (C) 1999 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+define pz
+set __gmpz_dump ($)
+end
+
+define pq
+set __gmpz_dump ($->_mp_num)
+echo /
+set __gmpz_dump ($->_mp_den)
+end
+
+define pf
+set __gmpf_dump ($)
+end
+
diff --git a/rts/gmp/AUTHORS b/rts/gmp/AUTHORS
new file mode 100644
index 0000000000..1fa057af6c
--- /dev/null
+++ b/rts/gmp/AUTHORS
@@ -0,0 +1,12 @@
+Authors if GNU MP (in chronological order)
+Torbjörn Granlund
+John Amanatides
+Paul Zimmermann
+Ken Weber
+Bennet Yee
+Andreas Schwab
+Robert Harley
+Linus Nordberg
+Kent Boortz
+Kevin Ryde
+Guillaume Hanrot
diff --git a/rts/gmp/COPYING b/rts/gmp/COPYING
new file mode 100644
index 0000000000..a6d7d0188a
--- /dev/null
+++ b/rts/gmp/COPYING
@@ -0,0 +1,336 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Hereny it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Section
diff --git a/rts/gmp/COPYING.LIB b/rts/gmp/COPYING.LIB
new file mode 100644
index 0000000000..c4792dd27a
--- /dev/null
+++ b/rts/gmp/COPYING.LIB
@@ -0,0 +1,515 @@
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+^L
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+^L
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+^L
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+^L
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+^L
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+^L
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+^L
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+^L
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it
+does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper
+mail.
+
+You should also get your employer (if you work as a programmer) or
+your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/rts/gmp/INSTALL b/rts/gmp/INSTALL
new file mode 100644
index 0000000000..62faa1a2e3
--- /dev/null
+++ b/rts/gmp/INSTALL
@@ -0,0 +1,146 @@
+
+ INSTALLING GNU MP
+ =================
+
+
+These instructions are only for the impatient. Others should read the install
+instructions in the manual, gmp.info. Use
+
+ info -f ./gmp.info
+
+or in emacs
+
+ C-u C-h i gmp.info
+
+
+Here are some brief instructions on how to install GMP, and some examples to
+help you get started using GMP.
+
+First, you need to compile, and optionally install, GMP. Since you're
+impatient, try this:
+
+ ./configure; make
+
+If that fails, or you care about the performance of GMP, you need to read the
+full instructions in the chapter "Installing GMP", in the manual.
+
+Next, try some small test programs, for example the ones below.
+
+In GMP programs, all variables need to be initialized before they are
+assigned, and cleared out before program flow leaves the scope in which they
+were declared. Here is an example program that reads two numbers from the
+command line, multiplies them, and prints the result to stdout.
+
+
+ #include <stdio.h>
+ #include <gmp.h> /* All GMP programs need to include gmp.h */
+
+ main (int argc, char **argv)
+ {
+ mpz_t a, b, p;
+
+ if (argc != 3)
+ { printf ("Usage: %s <number> <number>\n", argv[0]); exit (1); }
+
+ /* Initialize variables */
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (p);
+
+ /* Assign a and b from base 10 strings in argv */
+ mpz_set_str (a, argv[1], 10);
+ mpz_set_str (b, argv[2], 10);
+
+ /* Multiply a and b and put the result in p */
+ mpz_mul (p, a, b);
+
+ /* Print p in base 10 */
+ mpz_out_str (stdout, 10, p);
+ fputc ('\n', stdout);
+
+ /* Clear out variables */
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (p);
+ exit (0);
+ }
+
+
+This might look tedious, with all the initializing and clearing. Fortunately
+some of these operations can be combined, and other operations can often be
+avoided. An experienced GMP user might write:
+
+
+ #include <stdio.h>
+ #include <gmp.h>
+
+ main (int argc, char **argv)
+ {
+ mpz_t a, b, p;
+
+ if (argc != 3)
+ { printf ("Usage: %s <number> <number>\n", argv[0]); exit (1); }
+
+ /* Initialize and assign a and b from base 10 strings in argv */
+ mpz_init_set_str (a, argv[1], 10);
+ mpz_init_set_str (b, argv[2], 10);
+ /* Initialize p */
+ mpz_init (p);
+
+ /* Multiply a and b and put the result in p */
+ mpz_mul (p, a, b);
+
+ /* Print p in base 10 */
+ mpz_out_str (stdout, 10, p);
+ fputc ('\n', stdout);
+
+ /* Since we're about to exit, no need to clear out variables */
+ exit (0);
+ }
+
+
+Now you have to compile your test program, and link it with the GMP library.
+Assuming your working directory is still the gmp source directory, and your
+source file is called example.c, enter:
+
+ gcc -g -I. example.c .libs/libgmp.a
+
+After installing, the command becomes: "gcc -g example.c -lgmp". Also, GMP is
+libtool based so you can use that to link if you want.
+
+Now try to run the example:
+
+ ./a.out 98365871231256752134 319378318340103345227
+ 31415926535897932384618573336104570964418
+
+The functions used here all operate on signed integers, and have names
+starting with "mpz_". There are many more such functions than used in these
+examples. See the chapter "Integer Functions" in the manual, for a complete
+list.
+
+There are two other main classes of functions in GMP. They operate on
+rational numbers and floating-point numbers, respectively. The chapters
+"Rational Number Functions", and "Floating-point Functions" document these
+classes.
+
+To run a set of tests, do "make check". This will take a while.
+
+To create the printable documentation from the texinfo source, type "make
+gmp.dvi" or "make gmp.ps". This requires various "tex" commands.
+
+To install the library, do "make install" (then you can use -lgmp instead of
+.libs/libgmp.a).
+
+If you decide to use GMP, it is a good idea you at least read the chapter "GMP
+Basics" in the manual.
+
+Some known build problems are noted in the "Installing GMP" chapter of
+the manual. Please report other problems to bug-gmp@gnu.org.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 78
+End:
diff --git a/rts/gmp/Makefile.am b/rts/gmp/Makefile.am
new file mode 100644
index 0000000000..b73b805c6e
--- /dev/null
+++ b/rts/gmp/Makefile.am
@@ -0,0 +1,197 @@
+## Process this file with automake to generate Makefile.in
+
+
+# Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# make check
+#
+# It'd be good if "make check" first did a "make all" or whatever to
+# build libgmp.la, but it's not clear how best to do this. Putting a
+# "check:" target is overridden by automake, and a "check-local:" runs
+# too late (due to depth-first subdirectory processing). For now it's
+# necessary to do "make && make check".
+#
+# MPF_OBJECTS etc
+#
+# Libtool needs all the .lo files passed to it if it's going to build
+# both a static and shared library. If a convenience library like
+# mpf/libmpf.la is passed then the resulting libgmp.a gets the PIC .lo
+# objects rather than the non-PIC .o's.
+#
+# Unfortunately this leads to the big lists of objects below. Something
+# like mpz/*.lo would probably work, but might risk missing something
+# out or getting something extra. The source files for each .lo are
+# listed in the Makefile.am's in the subdirectories.
+
+
+# Libtool -version-info for libgmp.la and libmp.la. See (libtool)Versioning
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+# AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+# CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp and libmp, only do it just before a release.
+#
+# GMP -version-info
+# release libgmp libmp
+# 2.0.x - -
+# 3.0 3:0:0 3:0:0
+# 3.0.1 3:1:0 3:0:0
+# 3.1 4:0:1 4:0:1
+# 3.1.1 4:1:1 4:1:1
+#
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which is what has been used on
+# Debian GNU/Linux packages of gmp 2. Pretend gmp 2 was 2:0:0, so the
+# interface changes for gmp 3 mean 3:0:0 is right.
+
+LIBGMP_LT_CURRENT = 4
+LIBGMP_LT_REVISION = 1
+LIBGMP_LT_AGE = 1
+
+LIBMP_LT_CURRENT = 4
+LIBMP_LT_REVISION = 1
+LIBMP_LT_AGE = 1
+
+
+AUTOMAKE_OPTIONS = gnu check-news no-dependencies ansi2knr
+
+SUBDIRS = mpn mpz mpq mpf mpbsd mpfr tests demos tune
+
+include_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION) $(MPFR_HEADERS_OPTION)
+EXTRA_HEADERS = mp.h
+
+lib_LTLIBRARIES = libgmp.la $(MPBSD_LTLIBRARIES_OPTION)
+
+EXTRA_DIST = .gdbinit gmp-impl.h longlong.h stack-alloc.h urandom.h doc macos
+
+DISTCLEANFILES = asm-syntax.h config.m4 @gmp_srclinks@
+
+
+MPF_OBJECTS = mpf/init.lo mpf/init2.lo mpf/set.lo mpf/set_ui.lo mpf/set_si.lo \
+ mpf/set_str.lo mpf/set_d.lo mpf/set_z.lo mpf/iset.lo mpf/iset_ui.lo \
+ mpf/iset_si.lo mpf/iset_str.lo mpf/iset_d.lo mpf/clear.lo mpf/get_str.lo \
+ mpf/dump.lo mpf/size.lo mpf/eq.lo mpf/reldiff.lo mpf/sqrt.lo mpf/random2.lo \
+ mpf/inp_str.lo mpf/out_str.lo mpf/add.lo mpf/add_ui.lo mpf/sub.lo \
+ mpf/sub_ui.lo mpf/ui_sub.lo mpf/mul.lo mpf/mul_ui.lo mpf/div.lo \
+ mpf/div_ui.lo mpf/cmp.lo mpf/cmp_ui.lo mpf/cmp_si.lo mpf/mul_2exp.lo \
+ mpf/div_2exp.lo mpf/abs.lo mpf/neg.lo mpf/set_q.lo mpf/get_d.lo \
+ mpf/set_dfl_prec.lo mpf/set_prc.lo mpf/set_prc_raw.lo mpf/get_prc.lo \
+ mpf/ui_div.lo mpf/sqrt_ui.lo mpf/floor.lo mpf/ceil.lo mpf/trunc.lo \
+ mpf/pow_ui.lo mpf/urandomb.lo mpf/swap.lo
+MPZ_OBJECTS = mpz/abs.lo mpz/add.lo mpz/add_ui.lo mpz/addmul_ui.lo mpz/and.lo \
+ mpz/array_init.lo mpz/bin_ui.lo mpz/bin_uiui.lo mpz/cdiv_q.lo \
+ mpz/cdiv_q_ui.lo mpz/cdiv_qr.lo mpz/cdiv_qr_ui.lo mpz/cdiv_r.lo \
+ mpz/cdiv_r_ui.lo mpz/cdiv_ui.lo mpz/clear.lo mpz/clrbit.lo mpz/cmp.lo \
+ mpz/cmp_si.lo mpz/cmp_ui.lo mpz/cmpabs.lo mpz/cmpabs_ui.lo mpz/com.lo \
+ mpz/divexact.lo mpz/dump.lo mpz/fac_ui.lo mpz/fdiv_q.lo mpz/fdiv_q_2exp.lo \
+ mpz/fdiv_q_ui.lo mpz/fdiv_qr.lo mpz/fdiv_qr_ui.lo mpz/fdiv_r.lo \
+ mpz/fdiv_r_2exp.lo mpz/fdiv_r_ui.lo mpz/fdiv_ui.lo mpz/fib_ui.lo \
+ mpz/fits_sint_p.lo mpz/fits_slong_p.lo mpz/fits_sshort_p.lo \
+ mpz/fits_uint_p.lo mpz/fits_ulong_p.lo mpz/fits_ushort_p.lo mpz/gcd.lo \
+ mpz/gcd_ui.lo mpz/gcdext.lo mpz/get_d.lo mpz/get_si.lo mpz/get_str.lo \
+ mpz/get_ui.lo mpz/getlimbn.lo mpz/hamdist.lo mpz/init.lo mpz/inp_raw.lo \
+ mpz/inp_str.lo mpz/invert.lo mpz/ior.lo mpz/iset.lo mpz/iset_d.lo \
+ mpz/iset_si.lo mpz/iset_str.lo mpz/iset_ui.lo mpz/jacobi.lo \
+ mpz/kronsz.lo mpz/kronuz.lo mpz/kronzs.lo mpz/kronzu.lo \
+ mpz/lcm.lo mpz/legendre.lo \
+ mpz/mod.lo mpz/mul.lo mpz/mul_2exp.lo mpz/mul_si.lo mpz/mul_ui.lo \
+ mpz/neg.lo mpz/nextprime.lo mpz/out_raw.lo mpz/out_str.lo mpz/perfpow.lo mpz/perfsqr.lo \
+ mpz/popcount.lo mpz/pow_ui.lo mpz/powm.lo mpz/powm_ui.lo mpz/pprime_p.lo \
+ mpz/random.lo mpz/random2.lo mpz/realloc.lo mpz/remove.lo mpz/root.lo \
+ mpz/rrandomb.lo \
+ mpz/scan0.lo mpz/scan1.lo mpz/set.lo mpz/set_d.lo mpz/set_f.lo mpz/set_q.lo \
+ mpz/set_si.lo mpz/set_str.lo mpz/set_ui.lo mpz/setbit.lo mpz/size.lo \
+ mpz/sizeinbase.lo mpz/sqrt.lo mpz/sqrtrem.lo mpz/sub.lo mpz/sub_ui.lo \
+ mpz/swap.lo mpz/tdiv_ui.lo mpz/tdiv_q.lo mpz/tdiv_q_2exp.lo mpz/tdiv_q_ui.lo \
+ mpz/tdiv_qr.lo mpz/tdiv_qr_ui.lo mpz/tdiv_r.lo mpz/tdiv_r_2exp.lo \
+ mpz/tdiv_r_ui.lo mpz/tstbit.lo mpz/ui_pow_ui.lo mpz/urandomb.lo \
+ mpz/urandomm.lo mpz/xor.lo
+MPQ_OBJECTS = mpq/add.lo mpq/canonicalize.lo mpq/clear.lo mpq/cmp.lo \
+ mpq/cmp_ui.lo mpq/div.lo mpq/get_d.lo mpq/get_den.lo mpq/get_num.lo \
+ mpq/init.lo mpq/inv.lo mpq/mul.lo mpq/neg.lo mpq/out_str.lo \
+ mpq/set.lo mpq/set_den.lo \
+ mpq/set_num.lo mpq/set_si.lo mpq/set_ui.lo mpq/sub.lo mpq/equal.lo \
+ mpq/set_z.lo mpq/set_d.lo mpq/swap.lo
+MPN_OBJECTS = @mpn_objs_in_libgmp@
+
+MPBSD_OBJECTS = mpbsd/add.lo mpbsd/tdiv_qr.lo mpbsd/move.lo mpbsd/powm.lo \
+ mpbsd/sub.lo mpbsd/cmp.lo mpbsd/mfree.lo mpbsd/mtox.lo mpbsd/realloc.lo \
+ mpbsd/gcd.lo mpbsd/itom.lo mpbsd/min.lo mpbsd/mul.lo mpbsd/mout.lo \
+ mpbsd/pow_ui.lo mpbsd/sdiv.lo mpbsd/sqrtrem.lo mpbsd/xtom.lo
+
+# FIXME: Add mpfr/rnd_mode.lo when it's clean.
+MPFR_OBJECTS = mpfr/add.lo mpfr/div_2exp.lo mpfr/neg.lo mpfr/set_dfl_prec.lo \
+ mpfr/set_str_raw.lo mpfr/agm.lo mpfr/get_str.lo mpfr/print_raw.lo \
+ mpfr/set_dfl_rnd.lo mpfr/sqrt.lo mpfr/clear.lo mpfr/init.lo \
+ mpfr/set_f.lo mpfr/sub.lo mpfr/cmp.lo mpfr/mul.lo mpfr/round.lo \
+ mpfr/set_prec.lo mpfr/cmp_ui.lo mpfr/mul_2exp.lo mpfr/set.lo mpfr/set_si.lo \
+ mpfr/div.lo mpfr/mul_ui.lo mpfr/set_d.lo mpfr/pow.lo mpfr/out_str.lo \
+ mpfr/pi.lo mpfr/set_z.lo mpfr/add_ulp.lo mpfr/log2.lo mpfr/random.lo \
+ mpfr/log.lo mpfr/exp.lo mpfr/div_ui.lo mpfr/zeta.lo mpfr/karadiv.lo \
+ mpfr/karasqrt.lo mpfr/print_rnd_mode.lo
+
+
+if WANT_MPFR
+MPFR_HEADERS_OPTION = mpfr/mpfr.h
+MPFR_OBJECTS_OPTION = $(MPFR_OBJECTS)
+MPFR_LIBADD_OPTION = -lm
+endif
+libgmp_la_SOURCES = assert.c compat.c errno.c memory.c mp_set_fns.c \
+ mp_clz_tab.c mp_minv_tab.c \
+ rand.c randclr.c randlc.c randlc2x.c randraw.c randsd.c \
+ randsdui.c version.c stack-alloc.c mp_bpl.c extract-dbl.c insert-dbl.c
+libgmp_la_DEPENDENCIES = \
+ $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPN_OBJECTS) $(MPQ_OBJECTS) \
+ $(MPFR_OBJECTS_OPTION)
+libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES) $(MPFR_LIBADD_OPTION)
+libgmp_la_LDFLAGS = \
+ -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
+
+
+if WANT_MPBSD
+MPBSD_HEADERS_OPTION = mp.h
+MPBSD_LTLIBRARIES_OPTION = libmp.la
+endif
+libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \
+ mp_minv_tab.c mp_set_fns.c stack-alloc.c
+libmp_la_DEPENDENCIES = $(MPBSD_OBJECTS) $(MPN_OBJECTS) \
+ mpz/add.lo mpz/clear.lo mpz/cmp.lo mpz/init.lo mpz/mod.lo mpz/mul.lo \
+ mpz/mul_2exp.lo mpz/realloc.lo mpz/set.lo mpz/set_ui.lo mpz/tdiv_r.lo \
+ mpz/sub.lo
+libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
+libmp_la_LDFLAGS = \
+ -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)
+
+
+info_TEXINFOS = gmp.texi
+
+
+# Don't ship CVS directories or emacs backups.
+dist-hook:
+ -find $(distdir) \( -name CVS -type d \) -o -name "*.~*" \
+ | xargs rm -rf
diff --git a/rts/gmp/Makefile.in b/rts/gmp/Makefile.in
new file mode 100644
index 0000000000..e63383e7a7
--- /dev/null
+++ b/rts/gmp/Makefile.in
@@ -0,0 +1,932 @@
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = .
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+
+@SET_MAKE@
+build_alias = @build_alias@
+build_triplet = @build@
+host_alias = @host_alias@
+host_triplet = @host@
+target_alias = @target_alias@
+target_triplet = @target@
+AMDEP = @AMDEP@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+EXEEXT = @EXEEXT@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+gmp_srclinks = @gmp_srclinks@
+install_sh = @install_sh@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+
+# Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+# make check
+#
+# It'd be good if "make check" first did a "make all" or whatever to
+# build libgmp.la, but it's not clear how best to do this. Putting a
+# "check:" target is overridden by automake, and a "check-local:" runs
+# too late (due to depth-first subdirectory processing). For now it's
+# necessary to do "make && make check".
+#
+# MPF_OBJECTS etc
+#
+# Libtool needs all the .lo files passed to it if it's going to build
+# both a static and shared library. If a convenience library like
+# mpf/libmpf.la is passed then the resulting libgmp.a gets the PIC .lo
+# objects rather than the non-PIC .o's.
+#
+# Unfortunately this leads to the big lists of objects below. Something
+# like mpz/*.lo would probably work, but might risk missing something
+# out or getting something extra. The source files for each .lo are
+# listed in the Makefile.am's in the subdirectories.
+
+# Libtool -version-info for libgmp.la and libmp.la. See (libtool)Versioning
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+# AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+# CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp and libmp, only do it just before a release.
+#
+# GMP -version-info
+# release libgmp libmp
+# 2.0.x - -
+# 3.0 3:0:0 3:0:0
+# 3.0.1 3:1:0 3:0:0
+# 3.1 4:0:1 4:0:1
+# 3.1.1 4:1:1 4:1:1
+#
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which is what has been used on
+# Debian GNU/Linux packages of gmp 2. Pretend gmp 2 was 2:0:0, so the
+# interface changes for gmp 3 mean 3:0:0 is right.
+
+
+LIBGMP_LT_CURRENT = 4
+LIBGMP_LT_REVISION = 1
+LIBGMP_LT_AGE = 1
+
+LIBMP_LT_CURRENT = 4
+LIBMP_LT_REVISION = 1
+LIBMP_LT_AGE = 1
+
+AUTOMAKE_OPTIONS = gnu check-news no-dependencies ansi2knr
+
+SUBDIRS = mpn mpz
+
+include_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION) $(MPFR_HEADERS_OPTION)
+EXTRA_HEADERS = mp.h
+
+lib_LTLIBRARIES = libgmp.la $(MPBSD_LTLIBRARIES_OPTION)
+
+EXTRA_DIST = .gdbinit gmp-impl.h longlong.h stack-alloc.h urandom.h doc macos
+
+DISTCLEANFILES = asm-syntax.h config.m4 @gmp_srclinks@
+
+MPZ_OBJECTS = mpz/abs.lo mpz/add.lo mpz/add_ui.lo mpz/addmul_ui.lo mpz/and.lo \
+ mpz/array_init.lo mpz/bin_ui.lo mpz/bin_uiui.lo mpz/cdiv_q.lo \
+ mpz/cdiv_q_ui.lo mpz/cdiv_qr.lo mpz/cdiv_qr_ui.lo mpz/cdiv_r.lo \
+ mpz/cdiv_r_ui.lo mpz/cdiv_ui.lo mpz/clear.lo mpz/clrbit.lo mpz/cmp.lo \
+ mpz/cmp_si.lo mpz/cmp_ui.lo mpz/cmpabs.lo mpz/cmpabs_ui.lo mpz/com.lo \
+ mpz/divexact.lo mpz/dump.lo mpz/fac_ui.lo mpz/fdiv_q.lo mpz/fdiv_q_2exp.lo \
+ mpz/fdiv_q_ui.lo mpz/fdiv_qr.lo mpz/fdiv_qr_ui.lo mpz/fdiv_r.lo \
+ mpz/fdiv_r_2exp.lo mpz/fdiv_r_ui.lo mpz/fdiv_ui.lo mpz/fib_ui.lo \
+ mpz/fits_sint_p.lo mpz/fits_slong_p.lo mpz/fits_sshort_p.lo \
+ mpz/fits_uint_p.lo mpz/fits_ulong_p.lo mpz/fits_ushort_p.lo mpz/gcd.lo \
+ mpz/gcd_ui.lo mpz/gcdext.lo mpz/get_d.lo mpz/get_si.lo mpz/get_str.lo \
+ mpz/get_ui.lo mpz/getlimbn.lo mpz/hamdist.lo mpz/init.lo mpz/inp_raw.lo \
+ mpz/inp_str.lo mpz/invert.lo mpz/ior.lo mpz/iset.lo mpz/iset_d.lo \
+ mpz/iset_si.lo mpz/iset_str.lo mpz/iset_ui.lo mpz/jacobi.lo \
+ mpz/kronsz.lo mpz/kronuz.lo mpz/kronzs.lo mpz/kronzu.lo \
+ mpz/lcm.lo mpz/legendre.lo \
+ mpz/mod.lo mpz/mul.lo mpz/mul_2exp.lo mpz/mul_si.lo mpz/mul_ui.lo \
+ mpz/neg.lo mpz/nextprime.lo mpz/out_raw.lo mpz/out_str.lo mpz/perfpow.lo mpz/perfsqr.lo \
+ mpz/popcount.lo mpz/pow_ui.lo mpz/powm.lo mpz/powm_ui.lo mpz/pprime_p.lo \
+ mpz/random.lo mpz/random2.lo mpz/realloc.lo mpz/remove.lo mpz/root.lo \
+ mpz/rrandomb.lo \
+ mpz/scan0.lo mpz/scan1.lo mpz/set.lo mpz/set_d.lo mpz/set_f.lo mpz/set_q.lo \
+ mpz/set_si.lo mpz/set_str.lo mpz/set_ui.lo mpz/setbit.lo mpz/size.lo \
+ mpz/sizeinbase.lo mpz/sqrt.lo mpz/sqrtrem.lo mpz/sub.lo mpz/sub_ui.lo \
+ mpz/swap.lo mpz/tdiv_ui.lo mpz/tdiv_q.lo mpz/tdiv_q_2exp.lo mpz/tdiv_q_ui.lo \
+ mpz/tdiv_qr.lo mpz/tdiv_qr_ui.lo mpz/tdiv_r.lo mpz/tdiv_r_2exp.lo \
+ mpz/tdiv_r_ui.lo mpz/tstbit.lo mpz/ui_pow_ui.lo mpz/urandomb.lo \
+ mpz/urandomm.lo mpz/xor.lo
+
+MPN_OBJECTS = @mpn_objs_in_libgmp@
+
+MPBSD_OBJECTS = mpbsd/add.lo mpbsd/tdiv_qr.lo mpbsd/move.lo mpbsd/powm.lo \
+ mpbsd/sub.lo mpbsd/cmp.lo mpbsd/mfree.lo mpbsd/mtox.lo mpbsd/realloc.lo \
+ mpbsd/gcd.lo mpbsd/itom.lo mpbsd/min.lo mpbsd/mul.lo mpbsd/mout.lo \
+ mpbsd/pow_ui.lo mpbsd/sdiv.lo mpbsd/sqrtrem.lo mpbsd/xtom.lo
+
+
+
+@WANT_MPFR_TRUE@MPFR_HEADERS_OPTION = @WANT_MPFR_TRUE@mpfr/mpfr.h
+@WANT_MPFR_TRUE@MPFR_OBJECTS_OPTION = @WANT_MPFR_TRUE@$(MPFR_OBJECTS)
+@WANT_MPFR_TRUE@MPFR_LIBADD_OPTION = @WANT_MPFR_TRUE@-lm
+libgmp_la_SOURCES = assert.c compat.c errno.c memory.c mp_set_fns.c \
+ mp_clz_tab.c mp_minv_tab.c \
+ version.c stack-alloc.c mp_bpl.c extract-dbl.c insert-dbl.c
+
+libgmp_la_DEPENDENCIES = \
+ $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPN_OBJECTS) $(MPQ_OBJECTS) \
+ $(MPFR_OBJECTS_OPTION)
+
+libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES) $(MPFR_LIBADD_OPTION)
+libgmp_la_LDFLAGS = \
+ -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
+
+
+@WANT_MPBSD_TRUE@MPBSD_HEADERS_OPTION = @WANT_MPBSD_TRUE@mp.h
+@WANT_MPBSD_TRUE@MPBSD_LTLIBRARIES_OPTION = @WANT_MPBSD_TRUE@libmp.la
+libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \
+ mp_minv_tab.c mp_set_fns.c stack-alloc.c
+
+libmp_la_DEPENDENCIES = $(MPBSD_OBJECTS) $(MPN_OBJECTS) \
+ mpz/add.lo mpz/clear.lo mpz/cmp.lo mpz/init.lo mpz/mod.lo mpz/mul.lo \
+ mpz/mul_2exp.lo mpz/realloc.lo mpz/set.lo mpz/set_ui.lo mpz/tdiv_r.lo \
+ mpz/sub.lo
+
+libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
+libmp_la_LDFLAGS = \
+ -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)
+
+
+info_TEXINFOS = gmp.texi
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I.
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+ANSI2KNR = @ANSI2KNR@
+am_libgmp_la_OBJECTS = assert$U.lo compat$U.lo errno$U.lo memory$U.lo \
+mp_set_fns$U.lo mp_clz_tab$U.lo mp_minv_tab$U.lo rand$U.lo randclr$U.lo \
+randlc$U.lo randlc2x$U.lo randraw$U.lo randsd$U.lo randsdui$U.lo \
+version$U.lo stack-alloc$U.lo mp_bpl$U.lo extract-dbl$U.lo \
+insert-dbl$U.lo
+libgmp_la_OBJECTS = $(am_libgmp_la_OBJECTS)
+am_libmp_la_OBJECTS = assert$U.lo errno$U.lo memory$U.lo mp_bpl$U.lo \
+mp_clz_tab$U.lo mp_minv_tab$U.lo mp_set_fns$U.lo stack-alloc$U.lo
+libmp_la_OBJECTS = $(am_libmp_la_OBJECTS)
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CFLAGS = @CFLAGS@
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libgmp_la_SOURCES) $(libmp_la_SOURCES)
+TEXI2DVI = texi2dvi
+# INFO_DEPS = gmp.info
+DVIS = gmp.dvi
+TEXINFOS = gmp.texi
+HEADERS = $(include_HEADERS)
+
+DIST_COMMON = README $(EXTRA_HEADERS) $(include_HEADERS) ./stamp-h.in \
+AUTHORS COPYING COPYING.LIB ChangeLog INSTALL Makefile.am Makefile.in \
+NEWS acconfig.h acinclude.m4 aclocal.m4 ansi2knr.1 ansi2knr.c \
+config.guess config.in config.sub configure configure.in depcomp \
+install-sh ltconfig ltmain.sh mdate-sh missing mkinstalldirs stamp-vti \
+texinfo.tex version.texi
+
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+GZIP_ENV = --best
+depcomp =
+SOURCES = $(libgmp_la_SOURCES) $(libmp_la_SOURCES)
+OBJECTS = $(am_libgmp_la_OBJECTS) $(am_libmp_la_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .c .dvi .info .lo .o .obj .ps .texi .texinfo .txi
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ configure.in acinclude.m4
+ cd $(srcdir) && $(ACLOCAL)
+
+config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ $(SHELL) ./config.status --recheck
+$(srcdir)/configure: @MAINTAINER_MODE_TRUE@$(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)
+ cd $(srcdir) && $(AUTOCONF)
+
+config.h: stamp-h
+ @if test ! -f $@; then \
+ rm -f stamp-h; \
+ $(MAKE) stamp-h; \
+ else :; fi
+stamp-h: $(srcdir)/config.in $(top_builddir)/config.status
+ @rm -f stamp-h stamp-hT
+ @echo timestamp > stamp-hT 2> /dev/null
+ cd $(top_builddir) \
+ && CONFIG_FILES= CONFIG_HEADERS=config.h:config.in \
+ $(SHELL) ./config.status
+ @mv stamp-hT stamp-h
+$(srcdir)/config.in: @MAINTAINER_MODE_TRUE@$(srcdir)/./stamp-h.in
+ @if test ! -f $@; then \
+ rm -f $(srcdir)/./stamp-h.in; \
+ $(MAKE) $(srcdir)/./stamp-h.in; \
+ else :; fi
+$(srcdir)/./stamp-h.in: $(top_srcdir)/configure.in $(ACLOCAL_M4) acconfig.h
+ @rm -f $(srcdir)/./stamp-h.in $(srcdir)/./stamp-h.inT
+ @echo timestamp > $(srcdir)/./stamp-h.inT 2> /dev/null
+ cd $(top_srcdir) && $(AUTOHEADER)
+ @mv $(srcdir)/./stamp-h.inT $(srcdir)/./stamp-h.in
+
+mostlyclean-hdr:
+
+clean-hdr:
+
+distclean-hdr:
+ -rm -f config.h
+
+maintainer-clean-hdr:
+
+mostlyclean-libLTLIBRARIES:
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+
+distclean-libLTLIBRARIES:
+
+maintainer-clean-libLTLIBRARIES:
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ echo " $(LIBTOOL) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$p; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+ done
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+ -rm -f *.$(OBJEXT)
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+
+maintainer-clean-libtool:
+
+mostlyclean-krextra:
+
+clean-krextra:
+ -rm -f ansi2knr
+
+distclean-krextra:
+
+maintainer-clean-krextra:
+ansi2knr: ansi2knr.$(OBJEXT)
+ $(LINK) ansi2knr.$(OBJEXT) $(LIBS)
+ansi2knr.$(OBJEXT): $(CONFIG_HEADER)
+
+
+mostlyclean-kr:
+ -rm -f *_.c
+
+clean-kr:
+
+distclean-kr:
+
+maintainer-clean-kr:
+
+gmp.dll: libgmp.a
+ dllwrap -mno-cygwin --target=i386-unknown-mingw32 \
+ --export-all --dllname gmp.dll --output-lib=libgmp_imp.a \
+ -o gmp.dll libgmp.a
+
+libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libgmp_la_LDFLAGS) $(libgmp_la_OBJECTS) $(libgmp_la_LIBADD) $(LIBS)
+
+libmp.la: $(libmp_la_OBJECTS) $(libmp_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libmp_la_LDFLAGS) $(libmp_la_OBJECTS) $(libmp_la_LIBADD) $(LIBS)
+.c.o:
+ $(COMPILE) -c $<
+.c.obj:
+ $(COMPILE) -c `cygpath -w $<`
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+assert_.c: assert.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/assert.c; then echo $(srcdir)/assert.c; else echo assert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > assert_.c
+compat_.c: compat.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/compat.c; then echo $(srcdir)/compat.c; else echo compat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > compat_.c
+errno_.c: errno.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/errno.c; then echo $(srcdir)/errno.c; else echo errno.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > errno_.c
+extract-dbl_.c: extract-dbl.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/extract-dbl.c; then echo $(srcdir)/extract-dbl.c; else echo extract-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > extract-dbl_.c
+insert-dbl_.c: insert-dbl.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/insert-dbl.c; then echo $(srcdir)/insert-dbl.c; else echo insert-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > insert-dbl_.c
+memory_.c: memory.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > memory_.c
+mp_bpl_.c: mp_bpl.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bpl.c; then echo $(srcdir)/mp_bpl.c; else echo mp_bpl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_bpl_.c
+mp_clz_tab_.c: mp_clz_tab.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_clz_tab.c; then echo $(srcdir)/mp_clz_tab.c; else echo mp_clz_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_clz_tab_.c
+mp_minv_tab_.c: mp_minv_tab.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_minv_tab.c; then echo $(srcdir)/mp_minv_tab.c; else echo mp_minv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_minv_tab_.c
+mp_set_fns_.c: mp_set_fns.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_set_fns.c; then echo $(srcdir)/mp_set_fns.c; else echo mp_set_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_set_fns_.c
+rand_.c: rand.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rand.c; then echo $(srcdir)/rand.c; else echo rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > rand_.c
+randclr_.c: randclr.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randclr.c; then echo $(srcdir)/randclr.c; else echo randclr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randclr_.c
+randlc_.c: randlc.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc.c; then echo $(srcdir)/randlc.c; else echo randlc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randlc_.c
+randlc2x_.c: randlc2x.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2x.c; then echo $(srcdir)/randlc2x.c; else echo randlc2x.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randlc2x_.c
+randraw_.c: randraw.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randraw.c; then echo $(srcdir)/randraw.c; else echo randraw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randraw_.c
+randsd_.c: randsd.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsd.c; then echo $(srcdir)/randsd.c; else echo randsd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randsd_.c
+randsdui_.c: randsdui.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsdui.c; then echo $(srcdir)/randsdui.c; else echo randsdui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randsdui_.c
+stack-alloc_.c: stack-alloc.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/stack-alloc.c; then echo $(srcdir)/stack-alloc.c; else echo stack-alloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > stack-alloc_.c
+version_.c: version.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > version_.c
+assert_.$(OBJEXT) assert_.lo compat_.$(OBJEXT) compat_.lo \
+errno_.$(OBJEXT) errno_.lo extract-dbl_.$(OBJEXT) extract-dbl_.lo \
+insert-dbl_.$(OBJEXT) insert-dbl_.lo memory_.$(OBJEXT) memory_.lo \
+mp_bpl_.$(OBJEXT) mp_bpl_.lo mp_clz_tab_.$(OBJEXT) mp_clz_tab_.lo \
+mp_minv_tab_.$(OBJEXT) mp_minv_tab_.lo mp_set_fns_.$(OBJEXT) \
+mp_set_fns_.lo rand_.$(OBJEXT) rand_.lo randclr_.$(OBJEXT) randclr_.lo \
+randlc_.$(OBJEXT) randlc_.lo randlc2x_.$(OBJEXT) randlc2x_.lo \
+randraw_.$(OBJEXT) randraw_.lo randsd_.$(OBJEXT) randsd_.lo \
+randsdui_.$(OBJEXT) randsdui_.lo stack-alloc_.$(OBJEXT) stack-alloc_.lo \
+version_.$(OBJEXT) version_.lo : $(ANSI2KNR)
+
+$(srcdir)/version.texi: @MAINTAINER_MODE_TRUE@stamp-vti
+ @:
+
+$(srcdir)/stamp-vti: gmp.texi $(top_srcdir)/configure.in
+ @echo "@set UPDATED `$(SHELL) $(srcdir)/mdate-sh $(srcdir)/gmp.texi`" > vti.tmp
+ @echo "@set EDITION $(VERSION)" >> vti.tmp
+ @echo "@set VERSION $(VERSION)" >> vti.tmp
+ @cmp -s vti.tmp $(srcdir)/version.texi \
+ || (echo "Updating $(srcdir)/version.texi"; \
+ cp vti.tmp $(srcdir)/version.texi)
+ -@rm -f vti.tmp
+ @cp $(srcdir)/version.texi $@
+
+mostlyclean-vti:
+ -rm -f vti.tmp
+
+clean-vti:
+
+distclean-vti:
+
+maintainer-clean-vti:
+ -@MAINTAINER_MODE_TRUE@rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi
+
+# gmp.info: gmp.texi version.texi
+# gmp.dvi: gmp.texi version.texi
+
+
+DVIPS = dvips
+
+.texi.info:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texi.dvi:
+ TEXINPUTS=$(srcdir):$$TEXINPUTS \
+ MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $<
+
+.texi:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texinfo.info:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texinfo:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texinfo.dvi:
+ TEXINPUTS=$(srcdir):$$TEXINPUTS \
+ MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $<
+
+.txi.info:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.txi.dvi:
+ TEXINPUTS=$(srcdir):$$TEXINPUTS \
+ MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $<
+
+.txi:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+.dvi.ps:
+ $(DVIPS) $< -o $@
+
+install-info-am: $(INFO_DEPS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(infodir)
+ @list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ d=$(srcdir); \
+ for ifile in `CDPATH=: && cd $$d && echo $$file $$file-[0-9] $$file-[0-9][0-9]`; do \
+ if test -f $$d/$$ifile; then \
+ echo " $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile"; \
+ $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile; \
+ else : ; fi; \
+ done; \
+ done
+ @$(POST_INSTALL)
+ @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \
+ list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ echo " install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file";\
+ install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file || :;\
+ done; \
+ else : ; fi
+
+uninstall-info:
+ $(PRE_UNINSTALL)
+ @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \
+ list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ echo " install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file"; \
+ install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file; \
+ done; \
+ else :; fi
+ @$(NORMAL_UNINSTALL)
+ @list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ (if cd $(DESTDIR)$(infodir); then \
+ echo " rm -f $$file $$file-[0-9] $$file-[0-9][0-9])"; \
+ rm -f $$file $$file-[0-9] $$file-[0-9][0-9]; \
+ else :; fi); \
+ done
+
+dist-info: $(INFO_DEPS)
+ list='$(INFO_DEPS)'; \
+ for base in $$list; do \
+ d=$(srcdir); \
+ for file in `CDPATH=: && cd $$d && eval echo $$base*`; do \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file; \
+ done; \
+ done
+
+mostlyclean-aminfo:
+ -rm -f gmp.aux gmp.cp gmp.cps gmp.dvi gmp.fn gmp.fns gmp.pgs gmp.ky \
+ gmp.kys gmp.ps gmp.log gmp.pg gmp.toc gmp.tp gmp.tps gmp.vr \
+ gmp.vrs gmp.op gmp.tr gmp.cv gmp.cn gmp.cm gmp.ov
+
+clean-aminfo:
+
+distclean-aminfo:
+
+maintainer-clean-aminfo:
+ cd $(srcdir) && for i in $(INFO_DEPS); do \
+ rm -f $$i; \
+ if test "`echo $$i-[0-9]*`" != "$$i-[0-9]*"; then \
+ rm -f $$i-[0-9]*; \
+ fi; \
+ done
+
+install-includeHEADERS: $(include_HEADERS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(includedir)
+ @list='$(include_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d= ; else d="$(srcdir)/"; fi; \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$f"; \
+ $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$f; \
+ done
+
+uninstall-includeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(include_HEADERS)'; for p in $$list; do \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " rm -f $(DESTDIR)$(includedir)/$$f"; \
+ rm -f $(DESTDIR)$(includedir)/$$f; \
+ done
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+ rev="$$subdir $$rev"; \
+ if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \
+ done; \
+ test "$$dot_seen" = "no" && rev=". $$rev"; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) config.in $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)config.in$$unique$(LISP)$$tags" \
+ || etags $(ETAGS_ARGS) $$tags config.in $$unique $(LISP)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+
+
+# This target untars the dist file and tries a VPATH configuration. Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+ GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(AMTAR) xf -
+ chmod -R a-w $(distdir); chmod a+w $(distdir)
+ mkdir $(distdir)/=build
+ mkdir $(distdir)/=inst
+ chmod a-w $(distdir)
+ dc_install_base=`CDPATH=: && cd $(distdir)/=inst && pwd` \
+ && cd $(distdir)/=build \
+ && ../configure --srcdir=.. --prefix=$$dc_install_base \
+ && $(MAKE) $(AM_MAKEFLAGS) \
+ && $(MAKE) $(AM_MAKEFLAGS) dvi \
+ && $(MAKE) $(AM_MAKEFLAGS) check \
+ && $(MAKE) $(AM_MAKEFLAGS) install \
+ && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+ && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+ && test `find $$dc_install_base -type f -print | wc -l` -le 1 \
+ && $(MAKE) $(AM_MAKEFLAGS) dist \
+ && $(MAKE) $(AM_MAKEFLAGS) distclean \
+ && rm -f $(distdir).tar.gz \
+ && test `find . -type f -print | wc -l` -eq 0
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+ @banner="$(distdir).tar.gz is ready for distribution"; \
+ dashes=`echo "$$banner" | sed s/./=/g`; \
+ echo "$$dashes"; \
+ echo "$$banner"; \
+ echo "$$dashes"
+dist: distdir
+ -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
+ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
+ || chmod -R a+r $(distdir)
+ $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+dist-all: distdir
+ -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
+ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
+ || chmod -R a+r $(distdir)
+ $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+distdir: $(DISTFILES)
+ @if sed 15q $(srcdir)/NEWS | fgrep -e "$(VERSION)" > /dev/null; then :; else \
+ echo "NEWS not updated; not releasing" 1>&2; \
+ exit 1; \
+ fi
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+ mkdir $(distdir)
+ $(mkinstalldirs) $(distdir)/mpfr
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pR $$d/$$file $(distdir); \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+ for subdir in $(SUBDIRS); do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \
+ || exit 1; \
+ fi; \
+ done
+ $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-info
+ $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-hook
+info-am: $(INFO_DEPS)
+info: info-recursive
+dvi-am: $(DVIS)
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+all-recursive-am: config.h
+ $(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+install-exec-am: install-libLTLIBRARIES
+install-exec: install-exec-recursive
+
+install-data-am: install-info-am install-includeHEADERS
+install-data: install-data-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am: uninstall-libLTLIBRARIES uninstall-info \
+ uninstall-includeHEADERS
+uninstall: uninstall-recursive
+all-am: Makefile $(INFO_DEPS) $(ANSI2KNR) $(LTLIBRARIES) $(HEADERS) \
+ config.h
+all-redirect: all-recursive-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+ $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(infodir) \
+ $(DESTDIR)$(includedir)
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+ -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+ -rm -f Makefile.in
+mostlyclean-am: mostlyclean-hdr mostlyclean-libLTLIBRARIES \
+ mostlyclean-compile mostlyclean-libtool \
+ mostlyclean-krextra mostlyclean-kr mostlyclean-vti \
+ mostlyclean-aminfo mostlyclean-tags mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am: clean-hdr clean-libLTLIBRARIES clean-compile clean-libtool \
+ clean-krextra clean-kr clean-vti clean-aminfo \
+ clean-tags clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am: distclean-hdr distclean-libLTLIBRARIES distclean-compile \
+ distclean-libtool distclean-krextra distclean-kr \
+ distclean-vti distclean-aminfo distclean-tags \
+ distclean-generic clean-am
+ -rm -f libtool
+
+distclean: distclean-recursive
+ -rm -f config.status
+
+maintainer-clean-am: maintainer-clean-hdr \
+ maintainer-clean-libLTLIBRARIES \
+ maintainer-clean-compile maintainer-clean-libtool \
+ maintainer-clean-krextra maintainer-clean-kr \
+ maintainer-clean-vti maintainer-clean-aminfo \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f config.status
+
+.PHONY: mostlyclean-hdr distclean-hdr clean-hdr maintainer-clean-hdr \
+mostlyclean-libLTLIBRARIES distclean-libLTLIBRARIES \
+clean-libLTLIBRARIES maintainer-clean-libLTLIBRARIES \
+uninstall-libLTLIBRARIES install-libLTLIBRARIES mostlyclean-compile \
+distclean-compile clean-compile maintainer-clean-compile \
+mostlyclean-libtool distclean-libtool clean-libtool \
+maintainer-clean-libtool mostlyclean-krextra distclean-krextra \
+clean-krextra maintainer-clean-krextra mostlyclean-kr distclean-kr \
+clean-kr maintainer-clean-kr mostlyclean-vti distclean-vti clean-vti \
+maintainer-clean-vti install-info-am uninstall-info mostlyclean-aminfo \
+distclean-aminfo clean-aminfo maintainer-clean-aminfo \
+uninstall-includeHEADERS install-includeHEADERS install-recursive \
+uninstall-recursive install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck all-recursive-am \
+install-exec-am install-exec install-data-am install-data install-am \
+install uninstall-am uninstall all-redirect all-am all install-strip \
+installdirs-am installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+# Don't ship CVS directories or emacs backups.
+dist-hook:
+ -find $(distdir) \( -name CVS -type d \) -o -name "*.~*" \
+ | xargs rm -rf
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rts/gmp/NEWS b/rts/gmp/NEWS
new file mode 100644
index 0000000000..3b549d59f3
--- /dev/null
+++ b/rts/gmp/NEWS
@@ -0,0 +1,136 @@
+Changes between MP version 3.1 and 3.1.1
+
+* Bug fixes for division (rare), mpf_get_str, FFT, and miscellaneous minor
+ things.
+
+Changes between MP version 3.0 and 3.1
+
+* Bug fixes.
+* Improved `make check' running more tests.
+* Tuned algorithm cutoff points for many machines. This will improve speed for
+ a lot of operations, in some cases by a large amount.
+* Major speed improvments: Alpha 21264.
+* Some speed improvments: Cray vector computers, AMD K6 and Athlon, Intel P5
+ and Pentium Pro/II/III.
+* The mpf_get_prec function now works as it did in GMP 2.
+* New utilities for auto-tuning and speed measuring.
+* Multiplication now optionally uses FFT for very large operands. (To enable
+ it, pass --enable-fft to configure.)
+* Support for new systems: Solaris running on x86, FreeBSD 5, HP-UX 11, Cray
+ vector computers, Rhapsody, Nextstep/Openstep, MacOS.
+* Support for shared libraries on 32-bit HPPA.
+* New integer functions: mpz_mul_si, mpz_odd_p, mpz_even_p.
+* New Kronecker symbol functions: mpz_kronecker_si, mpz_kronecker_ui,
+ mpz_si_kronecker, mpz_ui_kronecker.
+* New rational functions: mpq_out_str, mpq_swap.
+* New float functions: mpf_swap.
+* New mpn functions: mpn_divexact_by3c, mpn_tdiv_qr.
+* New EXPERIMENTAL function layer for accurate floating-point arithmetic, mpfr.
+ To try it, pass --enable-mpfr to configure. See the mpfr subdirectory for
+ more information; it is not documented in the main GMP manual.
+
+Changes between MP version 3.0 and 3.0.1
+
+* Memory leaks in gmp_randinit and mpz_probab_prime_p fixed.
+* Documentation for gmp_randinit fixed. Misc documentation errors fixed.
+
+Changes between MP version 2.0 and 3.0
+
+* Source level compatibility with past releases (except mpn_gcd).
+* Bug fixes.
+* Much improved speed thanks to both host independent and host dependent
+ optimizations.
+* Switch to autoconf/automake/libtool.
+* Support for building libgmp as a shared library.
+* Multiplication and squaring using 3-way Toom-Cook.
+* Division using the Burnikel-Ziegler method.
+* New functions computing binomial coefficients: mpz_bin_ui, mpz_bin_uiui.
+* New function computing Fibonacci numbers: mpz_fib_ui.
+* New random number generators: mpf_urandomb, mpz_rrandomb, mpz_urandomb,
+ mpz_urandomm, gmp_randclear, gmp_randinit, gmp_randinit_lc_2exp, gmp_randseed,
+ gmp_randseed_ui.
+* New function for quickly extracting limbs: mpz_getlimbn.
+* New functions performing integer size tests: mpz_fits_sint_p,
+ mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p, mpz_fits_ulong_p,
+ mpz_fits_ushort_p.
+* New mpf functions: mpf_ceil, mpf_floor, mpf_pow_ui, mpf_trunc.
+* New mpq function: mpq_set_d.
+* New mpz functions: mpz_addmul_ui, mpz_cmpabs, mpz_cmpabs_ui, mpz_lcm,
+ mpz_nextprime, mpz_perfect_power_p, mpz_remove, mpz_root, mpz_swap,
+ mpz_tdiv_ui, mpz_tstbit, mpz_xor.
+* New mpn function: mpn_divexact_by3.
+* New CPU support: DEC Alpha 21264, AMD K6 and Athlon, HPPA 2.0 and 64,
+ Intel Pentium Pro and Pentium-II/III, Sparc 64, PowerPC 64.
+* Almost 10 times faster mpz_invert and mpn_gcdext.
+* The interface of mpn_gcd has changed.
+* Better support for MIPS R4x000 and R5000 under Irix 6.
+* Improved support for SPARCv8 and SPARCv9 processors.
+
+Changes between MP version 2.0 and 2.0.2
+
+* Many bug fixes.
+
+Changes between MP version 1.3.2 and 2.0
+
+* Division routines in the mpz class have changed. There are three classes of
+ functions, that rounds the quotient to -infinity, 0, and +infinity,
+ respectively. The first class of functions have names that begin with
+ mpz_fdiv (f is short for floor), the second class' names begin with mpz_tdiv
+ (t is short for trunc), and the third class' names begin with mpz_cdiv (c is
+ short for ceil).
+
+ The old division routines beginning with mpz_m are similar to the new
+ mpz_fdiv, with the exception that some of the new functions return useful
+ values.
+
+ The old function names can still be used. All the old functions names will
+ now do floor division, not trunc division as some of them used to. This was
+ changed to make the functions more compatible with common mathematical
+ practice.
+
+ The mpz_mod and mpz_mod_ui functions now compute the mathematical mod
+ function. I.e., the sign of the 2nd argument is ignored.
+
+* The mpq assignment functions do not canonicalize their results. A new
+ function, mpq_canonicalize must be called by the user if the result is not
+ known to be canonical.
+* The mpn functions are now documented. These functions are intended for
+ very time critical applications, or applications that need full control over
+ memory allocation. Note that the mpn interface is irregular and hard to
+ use.
+* New functions for arbitrary precision floating point arithmetic. Names
+ begin with `mpf_'. Associated type mpf_t.
+* New and improved mpz functions, including much faster GCD, fast exact
+ division (mpz_divexact), bit scan (mpz_scan0 and mpz_scan1), and number
+ theoretical functions like Jacobi (mpz_jacobi) and multiplicative inverse
+ (mpz_invert).
+* New variable types (mpz_t and mpq_t) are available that makes syntax of
+ mpz and mpq calls nicer (no need for & before variables). The MP_INT and
+ MP_RAT types are still available for compatibility.
+* Uses GNU configure. This makes it possible to choose target architecture
+ and CPU variant, and to compile into a separate object directory.
+* Carefully optimized assembly for important inner loops. Support for DEC
+ Alpha, Amd 29000, HPPA 1.0 and 1.1, Intel Pentium and generic x86, Intel
+ i960, Motorola MC68000, MC68020, MC88100, and MC88110, Motorola/IBM
+ PowerPC, National NS32000, IBM POWER, MIPS R3000, R4000, SPARCv7,
+ SuperSPARC, generic SPARCv8, and DEC VAX. Some support also for ARM,
+ Clipper, IBM ROMP (RT), and Pyramid AP/XP.
+* Faster. Thanks to the assembler code, new algorithms, and general tuning.
+ In particular, the speed on machines without GCC is improved.
+* Support for machines without alloca.
+* Now under the LGPL.
+
+INCOMPATIBILITIES BETWEEN GMP 1 AND GMP 2
+
+* mpq assignment functions do not canonicalize their results.
+* mpz division functions round differently.
+* mpz mod functions now really compute mod.
+* mpz_powm and mpz_powm_ui now really use mod for reduction.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/README b/rts/gmp/README
new file mode 100644
index 0000000000..177c97eb12
--- /dev/null
+++ b/rts/gmp/README
@@ -0,0 +1,84 @@
+
+ THE GNU MP LIBRARY
+
+
+GNU MP is a library for arbitrary precision arithmetic, operating on signed
+integers, rational numbers, and floating point numbers. It has a rich set of
+functions, and the functions have a regular interface.
+
+GNU MP is designed to be as fast as possible, both for small operands and huge
+operands. The speed is achieved by using fullwords as the basic arithmetic
+type, by using fast algorithms, with carefully optimized assembly code for the
+most common inner loops for lots of CPUs, and by a general emphasis on speed
+(instead of simplicity or elegance).
+
+GNU MP is believed to be faster than any other similar library. Its advantage
+increases with operand sizes for certain operations, since GNU MP in many
+cases has asymptotically faster algorithms.
+
+GNU MP is free software and may be freely copied on the terms contained in the
+files COPYING.LIB and COPYING (most of GNU MP is under the former, some under
+the latter).
+
+
+
+ OVERVIEW OF GNU MP
+
+There are five classes of functions in GNU MP.
+
+ 1. Signed integer arithmetic functions (mpz). These functions are intended
+ to be easy to use, with their regular interface. The associated type is
+ `mpz_t'.
+
+ 2. Rational arithmetic functions (mpq). For now, just a small set of
+ functions necessary for basic rational arithmetics. The associated type
+ is `mpq_t'.
+
+ 3. Floating-point arithmetic functions (mpf). If the C type `double'
+ doesn't give enough precision for your application, declare your
+ variables as `mpf_t' instead, set the precision to any number desired,
+ and call the functions in the mpf class for the arithmetic operations.
+
+ 4. Positive-integer, hard-to-use, very low overhead functions are in the
+ mpn class. No memory management is performed. The caller must ensure
+ enough space is available for the results. The set of functions is not
+ regular, nor is the calling interface. These functions accept input
+ arguments in the form of pairs consisting of a pointer to the least
+ significant word, and an integral size telling how many limbs (= words)
+ the pointer points to.
+
+ Almost all calculations, in the entire package, are made by calling these
+ low-level functions.
+
+ 5. Berkeley MP compatible functions.
+
+ To use these functions, include the file "mp.h". You can test if you are
+ using the GNU version by testing if the symbol __GNU_MP__ is defined.
+
+For more information on how to use GNU MP, please refer to the documentation.
+It is composed from the file gmp.texi, and can be displayed on the screen or
+printed. How to do that, as well how to build the library, is described in
+the INSTALL file in this directory.
+
+
+
+ REPORTING BUGS
+
+If you find a bug in the library, please make sure to tell us about it!
+
+You should first check the GNU MP web pages at http://www.swox.com/gmp/,
+under "Status of the current release". There will be patches for all known
+serious bugs there.
+
+Report bugs to bug-gmp@gnu.org. What information is needed in a good bug
+report is described in the manual. The same address can be used for
+suggesting modifications and enhancements.
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 78
+End:
diff --git a/rts/gmp/acconfig.h b/rts/gmp/acconfig.h
new file mode 100644
index 0000000000..dfb1b0b039
--- /dev/null
+++ b/rts/gmp/acconfig.h
@@ -0,0 +1,92 @@
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+@TOP@
+
+/* Define if a limb is long long. */
+#undef _LONG_LONG_LIMB
+
+/* Define if we have native implementation of function. */
+#undef HAVE_NATIVE_
+#undef HAVE_NATIVE_mpn_add
+#undef HAVE_NATIVE_mpn_add_1
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addmul_1
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addsub_n
+#undef HAVE_NATIVE_mpn_addsub_nc
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdivmod
+#undef HAVE_NATIVE_mpn_cmp
+#undef HAVE_NATIVE_mpn_com_n
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_divrem_newton
+#undef HAVE_NATIVE_mpn_divrem_classic
+#undef HAVE_NATIVE_mpn_dump
+#undef HAVE_NATIVE_mpn_gcd
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_gcdext
+#undef HAVE_NATIVE_mpn_get_str
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mul
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_mul_n
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_perfect_square_p
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_random2
+#undef HAVE_NATIVE_mpn_random
+#undef HAVE_NATIVE_mpn_rawrandom
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_scan0
+#undef HAVE_NATIVE_mpn_scan1
+#undef HAVE_NATIVE_mpn_set_str
+#undef HAVE_NATIVE_mpn_sqrtrem
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sub
+#undef HAVE_NATIVE_mpn_sub_1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_submul_1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_udiv_w_sdiv
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n
diff --git a/rts/gmp/acinclude.m4 b/rts/gmp/acinclude.m4
new file mode 100644
index 0000000000..a02394a963
--- /dev/null
+++ b/rts/gmp/acinclude.m4
@@ -0,0 +1,835 @@
+dnl GMP specific autoconf macros
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl GMP_HEADER_GETVAL(NAME,FILE)
+dnl ----------------------------
+dnl Expand to the value of a "#define NAME" from the given FILE.
+dnl The regexps here aren't very rugged, but are enough for gmp.
+dnl /dev/null as a parameter prevents a hang if $2 is accidentally omitted.
+
+define(GMP_HEADER_GETVAL,
+[patsubst(patsubst(
+esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]),
+[^.*$1[ ]+],[]),
+[[
+ ]*$],[])])
+
+
+dnl GMP_VERSION
+dnl -----------
+dnl The gmp version number, extracted from the #defines in gmp.h.
+dnl Two digits like 3.0 if patchlevel <= 0, or three digits like 3.0.1 if
+dnl patchlevel > 0.
+
+define(GMP_VERSION,
+[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp.h)[]dnl
+.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp.h)[]dnl
+ifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h) > 0),1,
+[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h)])])
+
+
+dnl GMP_PROG_M4()
+dnl -------------
+dnl
+dnl Find a working m4, either in $PATH or likely locations, and setup $M4
+dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user
+dnl choice and is accepted with no checks. GMP_PROG_M4 is like
+dnl AC_PATH_PROG or AC_CHECK_PROG, but it tests each m4 found to see if
+dnl it's good enough.
+dnl
+dnl See mpn/asm-defs.m4 for details on the known bad m4s.
+
+AC_DEFUN(GMP_PROG_M4,
+[AC_CACHE_CHECK([for suitable m4],
+ gmp_cv_prog_m4,
+[if test -n "$M4"; then
+ gmp_cv_prog_m4="$M4"
+else
+ cat >conftest.m4 <<\EOF
+dnl must protect this against being expanded during autoconf m4!
+[define(dollarhash,``$][#'')dnl
+ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $][# not supported (SunOS /usr/bin/m4)
+'')dnl
+ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')dnl
+ifelse(t1`'t2,YY,`good
+')dnl]
+EOF
+ echo "trying m4" 1>&AC_FD_CC
+ gmp_tmp_val="`(m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="m4"
+ else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word. This closes a longstanding sh security hole.
+ ac_dummy="$PATH:/usr/5bin"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ echo "trying $ac_dir/m4" 1>&AC_FD_CC
+ gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="$ac_dir/m4"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ if test -z "$gmp_cv_prog_m4"; then
+ AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).])
+ fi
+ fi
+ rm -f conftest.m4
+fi])
+M4="$gmp_cv_prog_m4"
+AC_SUBST(M4)
+])
+
+
+dnl GMP_PROG_CC_FIND([CC_LIST], [REQ_64BIT_CC])
+dnl Find first working compiler in CC_LIST.
+dnl If REQ_64BIT_CC is "yes", the compiler is required to be able to
+dnl produce 64-bit code.
+dnl NOTE: If a compiler needs any special flags for producing 64-bit code,
+dnl these have to be found in shell variable `gmp_cflags64_{cc}', where `{cc}'
+dnl is the name of the compiler.
+dnl Set CC to the name of the first working compiler.
+dnl If a 64-bit compiler is found, set CC64 to the name of the compiler and
+dnl CFLAGS64 to flags to use.
+dnl This macro does not test if any of the compilers found is a GNU compiler.
+dnl To do this, when you have finally made up your mind on which one to use,
+dnl and set CC accordingly, invoke [GMP_PROG_CC_SELECT]. That macro will
+dnl also make sure that your selection of CFLAGS is valid.
+dnl
+AC_DEFUN(GMP_PROG_CC_FIND,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+ifelse([$1], , gmp_cc_list="gcc cc", gmp_cc_list="[$1]")
+ifelse([$2], , gmp_req_64bit_cc="no", gmp_req_64bit_cc="[$2]")
+
+CC32=
+CC64=
+for c in $gmp_cc_list; do
+ # Avoid cache hits.
+ unset CC
+ unset ac_cv_prog_CC
+ AC_CHECK_TOOL(CC, $c, $c)
+ if test -n "$CC"; then
+ eval c_flags=\$gmp_cflags_$c
+ GMP_PROG_CC_WORKS($CC, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" != "yes"; then
+ continue
+ fi
+
+ # Save first working compiler, whether 32- or 64-bit capable.
+ if test -z "$CC32"; then
+ CC32="$CC"
+ fi
+ if test "$gmp_req_64bit_cc" = "yes"; then
+ eval c_flags=\$gmp_cflags64_$c
+
+ # Verify that the compiler works in 64-bit mode as well.
+ # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link.
+ GMP_PROG_CC_WORKS($c, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" = "yes"; then
+ GMP_CHECK_CC_64BIT($c, $c_flags)
+ if test "$gmp_cv_cc_64bit" = "yes"; then
+ test -z "$CC64" && CC64="$c"
+ test -z "$CFLAGS64" && CFLAGS64="$c_flags"
+ # We have CC64 so we're done.
+ break
+ fi
+ fi
+ else
+ # We have CC32, and we don't need a 64-bit compiler so we're done.
+ break
+ fi
+ fi
+done
+CC="$CC32"
+])dnl
+
+dnl GMP_PROG_CC_SELECT
+dnl Check that `CC' works with `CFLAGS'. Check if `CC' is a GNU compiler.
+dnl Cache the result as `ac_cv_prog_CC'.
+AC_DEFUN(GMP_PROG_CC_SELECT,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+AC_PROG_CC_WORKS
+AC_PROG_CC_GNU
+
+if test "$ac_cv_prog_gcc" = "yes"; then
+ GCC=yes
+else
+ GCC=
+fi
+
+# Set CFLAGS if not already set.
+if test -z "$CFLAGS"; then
+ CFLAGS="-g"
+ if test "$GCC" = "yes"; then
+ CFLAGS="$CFLAGS -O2"
+ fi
+fi
+
+AC_SUBST(CC)
+AC_CACHE_VAL(ac_cv_prog_CC, ac_cv_prog_CC="$CC")
+AC_PROVIDE([AC_PROG_CC])
+])dnl
+
+dnl GMP_CHECK_CC_64BIT(cc, cflags64)
+dnl Find out if `CC' can produce 64-bit code.
+dnl Requires NM to be set to nm for target.
+dnl FIXME: Cache result.
+AC_DEFUN(GMP_CHECK_CC_64BIT,
+[
+ gmp_tmp_CC_save="$CC"
+ CC="[$1]"
+ AC_MSG_CHECKING([whether the C compiler ($CC) is 64-bit capable])
+ if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+ fi
+ gmp_tmp_CFLAGS_save="$CFLAGS"
+ CFLAGS="[$2]"
+
+ case "$target" in
+ hppa2.0*-*-*)
+ # FIXME: If gcc is installed under another name than "gcc", we will
+ # test the wrong thing.
+ if test "$CC" != "gcc"; then
+ dnl Let compiler version A.10.32.30 or higher be ok.
+ dnl Bad compiler output:
+ dnl ccom: HP92453-01 G.10.32.05 HP C Compiler
+ dnl Good compiler output:
+ dnl ccom: HP92453-01 A.10.32.30 HP C Compiler
+ echo >conftest.c
+ gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"`
+ rm conftest*
+ gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'`
+ gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+ gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+ gmp_cv_cc_64bit=no
+ test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \
+ && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \
+ && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \
+ && gmp_cv_cc_64bit=yes
+ else # gcc
+ # FIXME: Compile a minimal file and determine if the resulting object
+ # file is an ELF file. If so, gcc can produce 64-bit code.
+ # Do we have file(1) for target?
+ gmp_cv_cc_64bit=no
+ fi
+ ;;
+ mips-sgi-irix6.*)
+ # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit
+ # arithmetic but not 64-bit pointers, so the general test for sizeof
+ # (void *) is not valid.
+ # Simply try to compile an empty main. If that succeeds return
+ # true.
+ AC_TRY_COMPILE( , ,
+ gmp_cv_cc_64bit=yes, gmp_cv_cc_64bit=no,
+ gmp_cv_cc_64bit=no)
+ ;;
+ *-*-*)
+ # Allocate an array of size sizeof (void *) and use nm to determine its
+ # size. We depend on the first declared variable being put at address 0.
+ cat >conftest.c <<EOF
+[char arr[sizeof (void *)]={0};
+char post=0;]
+EOF
+ gmp_compile="$CC $CFLAGS -c conftest.c 1>&AC_FD_CC"
+ if AC_TRY_EVAL(gmp_compile); then
+ changequote(<,>)dnl
+ gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "8"; then
+ gmp_cv_cc_64bit=yes
+ else
+ gmp_cv_cc_64bit=no
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+ gmp_cv_cc_64bit=no
+ fi
+ rm -f conftest*
+ ;;
+ esac
+
+ CC="$gmp_tmp_CC_save"
+ CFLAGS="$gmp_tmp_CFLAGS_save"
+ AC_MSG_RESULT($gmp_cv_cc_64bit)
+])dnl
+
+dnl GMP_INIT([M4-DEF-FILE])
+dnl
+AC_DEFUN(GMP_INIT,
+[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]")
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4
+test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i
+test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p
+])dnl
+
+dnl GMP_FINISH
+dnl ----------
+dnl Create config.m4 from its accumulated parts.
+dnl
+dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include
+dnl of config.m4 is harmless.
+dnl
+dnl A separate ifdef on the angle bracket quoted part ensures the quoting
+dnl style there is respected. The basic defines from gmp_tmpconfigm4 are
+dnl fully quoted but are still put under an ifdef in case any have been
+dnl redefined by one of the m4 include files.
+dnl
+dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't
+dnl work, since it'd interpret parentheses and quotes in dnl comments, and
+dnl having a whole file as a macro argument would overflow the string space
+dnl on BSD m4.
+
+AC_DEFUN(GMP_FINISH,
+[AC_REQUIRE([GMP_INIT])
+echo "creating $gmp_configm4"
+echo ["dnl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+ echo ["changequote(<,>)dnl"] >> $gmp_configm4
+ echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4
+ cat $gmp_tmpconfigm4 >> $gmp_configm4
+ echo [">)"] >> $gmp_configm4
+ echo ["changequote(\`,')dnl"] >> $gmp_configm4
+ rm $gmp_tmpconfigm4
+fi
+echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+ cat $gmp_tmpconfigm4i >> $gmp_configm4
+ rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+ cat $gmp_tmpconfigm4p >> $gmp_configm4
+ rm $gmp_tmpconfigm4p
+fi
+echo ["')"] >> $gmp_configm4
+echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4
+])dnl
+
+dnl GMP_INCLUDE(FILE)
+AC_DEFUN(GMP_INCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["include(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_SINCLUDE(FILE)
+AC_DEFUN(GMP_SINCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["sinclude(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION])
+dnl [ Define M4 macro MACRO as DEFINITION in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Mind the quoting! No shell variables will get expanded. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+dnl [ config.m4 uses `<' and '>' as quote characters for all defines. ]
+AC_DEFUN(GMP_DEFINE,
+[AC_REQUIRE([GMP_INIT])
+echo ['define(<$1>, <$2>)'] >> ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_DEFINE_RAW(STRING, [, LOCATION])
+dnl [ Put STRING in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+AC_DEFUN(GMP_DEFINE_RAW,
+[AC_REQUIRE([GMP_INIT])
+echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_CHECK_ASM_LABEL_SUFFIX
+dnl Should a label have a colon or not?
+AC_DEFUN(GMP_CHECK_ASM_LABEL_SUFFIX,
+[AC_CACHE_CHECK([what assembly label suffix to use],
+ gmp_cv_check_asm_label_suffix,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_label_suffix=[""] ;;
+ *) gmp_cv_check_asm_label_suffix=[":"] ;;
+esac
+])
+echo ["define(<LABEL_SUFFIX>, <\$][1$gmp_cv_check_asm_label_suffix>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_UNDERSCORE([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Shamelessly borrowed from glibc.
+AC_DEFUN(GMP_CHECK_ASM_UNDERSCORE,
+[AC_CACHE_CHECK([if symbols are prefixed by underscore],
+ gmp_cv_check_asm_underscore,
+[cat > conftest.$ac_ext <<EOF
+dnl This sometimes fails to find confdefs.h, for some reason.
+dnl [#]line __oline__ "[$]0"
+[#]line __oline__ "configure"
+#include "confdefs.h"
+int underscore_test() {
+return; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ if grep _underscore_test conftest* >/dev/null; then
+ gmp_cv_check_asm_underscore=yes
+ else
+ gmp_cv_check_asm_underscore=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_underscore" = "yes"; then
+ GMP_DEFINE(GSYM_PREFIX, [_])
+ ifelse([$1], , :, [$1])
+else
+ GMP_DEFINE(GSYM_PREFIX, [])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_ALIGN_LOG([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Is parameter to `.align' logarithmic?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_LOG,
+[AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([if .align assembly directive is logarithmic],
+ gmp_cv_check_asm_align_log,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ .align 4
+ $gmp_cv_check_asm_globl foo
+ .byte 1
+ .align 4
+foo$gmp_cv_check_asm_label_suffix
+ .byte 2
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+ gmp_cv_check_asm_align_log=yes
+ else
+ gmp_cv_check_asm_align_log=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)"])
+if test "$gmp_cv_check_asm_align_log" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+
+dnl GMP_CHECK_ASM_ALIGN_FILL_0x90
+dnl -----------------------------
+dnl Determine whether a ",0x90" suffix works on a .align directive.
+dnl This is only meant for use on x86, where 0x90 is a "nop".
+dnl
+dnl Old gas, eg. 1.92.3 - needs ",0x90" or else the fill is an invalid 0x00.
+dnl New gas, eg. 2.91 - generates the good multibyte nop fills even when
+dnl ",0x90" is given.
+dnl Solaris 2.6 as - doesn't allow ",0x90", gives a fatal error.
+dnl Solaris 2.8 as - gives a warning for ",0x90", no ill effect.
+dnl
+dnl Note that both solaris "as"s only care about ",0x90" if they actually
+dnl have to use it to fill something, hence the .byte in the sample. It's
+dnl only the second .align that provokes an error or warning.
+dnl
+dnl We prefer to suppress the warning from solaris 2.8 to stop anyone
+dnl worrying something might be wrong.
+
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_FILL_0x90,
+[AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text],
+ gmp_cv_check_asm_align_fill_0x90,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ .align 4, 0x90
+ .byte 0
+ .align 4, 0x90
+EOF
+gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`"
+if test $? = 0; then
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then
+ echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+ else
+ gmp_cv_check_asm_align_fill_0x90=yes
+ fi
+else
+ echo "Non-zero exit code" 1>&AC_FD_CC
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(
+["define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)"])
+])
+
+
+dnl GMP_CHECK_ASM_TEXT
+AC_DEFUN(GMP_CHECK_ASM_TEXT,
+[AC_CACHE_CHECK([how to switch to text section], gmp_cv_check_asm_text,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_text={".csect .text[PR]"}
+ changequote([, ])
+ ;;
+ *-*-hpux*) gmp_cv_check_asm_text=[".code"] ;;
+ *) gmp_cv_check_asm_text=[".text"] ;;
+esac
+])
+echo ["define(<TEXT>, <$gmp_cv_check_asm_text>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_DATA
+dnl Can we say `.data'?
+AC_DEFUN(GMP_CHECK_ASM_DATA,
+[AC_CACHE_CHECK([how to switch to data section], gmp_cv_check_asm_data,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_data={".csect .data[RW]"}
+ changequote([, ])
+ ;;
+ *) gmp_cv_check_asm_data=[".data"] ;;
+esac
+])
+echo ["define(<DATA>, <$gmp_cv_check_asm_data>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_GLOBL
+dnl Can we say `.global'?
+AC_DEFUN(GMP_CHECK_ASM_GLOBL,
+[AC_CACHE_CHECK([how to export a symbol], gmp_cv_check_asm_globl,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_globl=[".export"] ;;
+ *) gmp_cv_check_asm_globl=[".globl"] ;;
+esac
+])
+echo ["define(<GLOBL>, <$gmp_cv_check_asm_globl>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_TYPE
+dnl Can we say `.type'?
+AC_DEFUN(GMP_CHECK_ASM_TYPE,
+[AC_CACHE_CHECK([how the .type assembly directive should be used],
+gmp_cv_check_asm_type,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+for gmp_tmp_prefix in @ \# %; do
+ echo " .type sym,${gmp_tmp_prefix}function" > conftest.s
+ if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_type="[.type \$][1,${gmp_tmp_prefix}\$][2]"
+ break
+ fi
+done
+if test -z "$gmp_cv_check_asm_type"; then
+ gmp_cv_check_asm_type="[dnl]"
+fi
+])
+echo ["define(<TYPE>, <$gmp_cv_check_asm_type>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_SIZE
+dnl Can we say `.size'?
+AC_DEFUN(GMP_CHECK_ASM_SIZE,
+[AC_CACHE_CHECK([if the .size assembly directive works], gmp_cv_check_asm_size,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+echo ' .size sym,1' > conftest.s
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_size="[.size \$][1,\$][2]"
+else
+ gmp_cv_check_asm_size="[dnl]"
+fi
+])
+echo ["define(<SIZE>, <$gmp_cv_check_asm_size>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_LSYM_PREFIX
+dnl What is the prefix for a local label?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_LSYM_PREFIX,
+[AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([what prefix to use for a local label],
+gmp_cv_check_asm_lsym_prefix,
+[if test -z "$NM"; then
+ echo; echo ["$0: fatal: need nm"]
+ exit 1
+fi
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+gmp_cv_check_asm_lsym_prefix="L"
+for gmp_tmp_pre in L .L $ L$; do
+ cat > conftest.s <<EOF
+dummy${gmp_cv_check_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ $NM conftest.o >/dev/null 2>&1
+ gmp_rc=$?
+ if test "$gmp_rc" != "0"; then
+ echo "configure: $NM failure, using default"
+ break
+ fi
+ if $NM conftest.o | grep gurkmacka >/dev/null; then true; else
+ gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre"
+ break
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+ # Use default.
+ fi
+done
+rm -f conftest*
+])
+echo ["define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_W32
+dnl How to [define] a 32-bit word.
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_W32,
+[AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([how to [define] a 32-bit word],
+ gmp_cv_check_asm_w32,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+
+# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+# that it's at 0x0. We'll have to declare another symbol before the
+# .long/.word and look at the distance between the two symbols. The
+# only problem is that the sed expression(s) barfs (on Solaris, for
+# example) for the symbol with value 0. For now, HPUX uses .word.
+
+case "$target" in
+ *-*-hpux*)
+ gmp_cv_check_asm_w32=".word"
+ ;;
+ *-*-*)
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+ for gmp_tmp_op in .long .word; do
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ $gmp_cv_check_asm_globl foo
+ $gmp_tmp_op 0
+foo${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "4"; then
+ gmp_cv_check_asm_w32="$gmp_tmp_op"
+ break
+ fi
+ fi
+ done
+ ;;
+esac
+
+if test -z "$gmp_cv_check_asm_w32"; then
+ echo; echo ["configure: $0: fatal: do not know how to define a 32-bit word"]
+ exit 1
+fi
+rm -f conftest*
+])
+echo ["define(<W32>, <$gmp_cv_check_asm_w32>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_MMX([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+dnl Can we assemble MMX insns?
+AC_DEFUN(GMP_CHECK_ASM_MMX,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler knows about MMX instructions],
+ gmp_cv_check_asm_mmx,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ por %mm0, %mm0
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_mmx=yes
+else
+ gmp_cv_check_asm_mmx=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_mmx" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ AC_MSG_WARN([+----------------------------------------------------------])
+ AC_MSG_WARN([| WARNING WARNING WARNING])
+ AC_MSG_WARN([| Target CPU has MMX code, but it can't be assembled by])
+ AC_MSG_WARN([| $CCAS $CFLAGS])
+ AC_MSG_WARN([| Non-MMX replacements will be used.])
+ AC_MSG_WARN([| This will be an inferior build.])
+ AC_MSG_WARN([+----------------------------------------------------------])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_SHLDL_CL([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+AC_DEFUN(GMP_CHECK_ASM_SHLDL_CL,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler takes cl with shldl],
+ gmp_cv_check_asm_shldl_cl,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ shldl %cl, %eax, %ebx
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_shldl_cl=yes
+else
+ gmp_cv_check_asm_shldl_cl=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_PROG_CC_WORKS(CC, CFLAGS, ACTION-IF-WORKS, [ACTION-IF-NOT-WORKS])
+dnl Check if CC can compile and link. Perform various target specific tests.
+dnl FIXME: Require `$target'.
+AC_DEFUN(GMP_PROG_CC_WORKS,
+[AC_LANG_C dnl Note: Destructive.
+CC="[$1]"
+CFLAGS="[$2]"
+AC_MSG_CHECKING([if the C compiler ($CC) works with flags $CFLAGS])
+
+# Simple test for all targets.
+AC_TRY_COMPILER([int main(){return(0);}],
+ tmp_works, tmp_cross)
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ AC_TRY_COMPILE( , [} void *g(); void *f() { return g(); } int bar(){],
+ tmp_works=yes, tmp_works=no)
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ [$3]
+else
+ ifelse([$4], , :, [$4])
+fi
+
+AC_MSG_RESULT($tmp_works)
+])dnl
+
+
+dnl GMP_C_ANSI2KNR
+dnl --------------
+dnl Setup to use ansi2knr if necessary.
+dnl
+dnl The test here is simply that if an ANSI style function works then
+dnl ansi2knr isn't needed. The normal tests for whether $CC works mean we
+dnl don't need to worry here about anything badly broken.
+dnl
+dnl AM_C_PROTOTYPES is the normal way to set up ansi2knr, but (in automake
+dnl March 2000) it gives the wrong answer on a C++ compiler because its
+dnl test requires that the compiler accept both ANSI and K&R, or otherwise
+dnl ansi2knr is used. A C++ compiler fails on the K&R part, which makes
+dnl AM_C_PROTOTYPES think it needs ansi2knr! GMP has no bare K&R so we
+dnl only need ANSI or K&R to work, not both.
+
+AC_DEFUN(GMP_C_ANSI2KNR,
+[AC_CACHE_CHECK([if ansi2knr should be used],
+ gmp_cv_c_ansi2knr,
+[cat >conftest.c <<EOF
+int main (int argc, char *argv[]) { return 0; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ gmp_cv_c_ansi2knr=no
+else
+ gmp_cv_c_ansi2knr=yes
+fi
+rm -f conftest.*
+])
+if test $gmp_cv_c_ansi2knr = no; then
+ U= ANSI2KNR=
+else
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+ AC_HEADER_STDC
+ AC_CHECK_HEADERS(string.h)
+fi
+AC_SUBST(U)
+AC_SUBST(ANSI2KNR)
+])
+
+
+dnl Deal with bad synchronization of Autoconf with Libtool.
+AC_DEFUN(AC_CANONICAL_BUILD, [_AC_CANONICAL_BUILD])
+AC_DEFUN(AC_CHECK_TOOL_PREFIX, [_AC_CHECK_TOOL_PREFIX])
diff --git a/rts/gmp/aclocal.m4 b/rts/gmp/aclocal.m4
new file mode 100644
index 0000000000..086c77915c
--- /dev/null
+++ b/rts/gmp/aclocal.m4
@@ -0,0 +1,1963 @@
+dnl aclocal.m4 generated automatically by aclocal 1.4a
+
+dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+dnl PARTICULAR PURPOSE.
+
+dnl GMP specific autoconf macros
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl GMP_HEADER_GETVAL(NAME,FILE)
+dnl ----------------------------
+dnl Expand to the value of a "#define NAME" from the given FILE.
+dnl The regexps here aren't very rugged, but are enough for gmp.
+dnl /dev/null as a parameter prevents a hang if $2 is accidentally omitted.
+
+define(GMP_HEADER_GETVAL,
+[patsubst(patsubst(
+esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]),
+[^.*$1[ ]+],[]),
+[[
+ ]*$],[])])
+
+
+dnl GMP_VERSION
+dnl -----------
+dnl The gmp version number, extracted from the #defines in gmp.h.
+dnl Two digits like 3.0 if patchlevel <= 0, or three digits like 3.0.1 if
+dnl patchlevel > 0.
+
+define(GMP_VERSION,
+[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp.h)[]dnl
+.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp.h)[]dnl
+ifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h) > 0),1,
+[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h)])])
+
+
+dnl GMP_PROG_M4()
+dnl -------------
+dnl
+dnl Find a working m4, either in $PATH or likely locations, and setup $M4
+dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user
+dnl choice and is accepted with no checks. GMP_PROG_M4 is like
+dnl AC_PATH_PROG or AC_CHECK_PROG, but it tests each m4 found to see if
+dnl it's good enough.
+dnl
+dnl See mpn/asm-defs.m4 for details on the known bad m4s.
+
+AC_DEFUN(GMP_PROG_M4,
+[AC_CACHE_CHECK([for suitable m4],
+ gmp_cv_prog_m4,
+[if test -n "$M4"; then
+ gmp_cv_prog_m4="$M4"
+else
+ cat >conftest.m4 <<\EOF
+dnl must protect this against being expanded during autoconf m4!
+[define(dollarhash,``$][#'')dnl
+ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $][# not supported (SunOS /usr/bin/m4)
+'')dnl
+ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')dnl
+ifelse(t1`'t2,YY,`good
+')dnl]
+EOF
+ echo "trying m4" 1>&AC_FD_CC
+ gmp_tmp_val="`(m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="m4"
+ else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word. This closes a longstanding sh security hole.
+ ac_dummy="$PATH:/usr/5bin"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ echo "trying $ac_dir/m4" 1>&AC_FD_CC
+ gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="$ac_dir/m4"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ if test -z "$gmp_cv_prog_m4"; then
+ AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).])
+ fi
+ fi
+ rm -f conftest.m4
+fi])
+M4="$gmp_cv_prog_m4"
+AC_SUBST(M4)
+])
+
+
+dnl GMP_PROG_CC_FIND([CC_LIST], [REQ_64BIT_CC])
+dnl Find first working compiler in CC_LIST.
+dnl If REQ_64BIT_CC is "yes", the compiler is required to be able to
+dnl produce 64-bit code.
+dnl NOTE: If a compiler needs any special flags for producing 64-bit code,
+dnl these have to be found in shell variable `gmp_cflags64_{cc}', where `{cc}'
+dnl is the name of the compiler.
+dnl Set CC to the name of the first working compiler.
+dnl If a 64-bit compiler is found, set CC64 to the name of the compiler and
+dnl CFLAGS64 to flags to use.
+dnl This macro does not test if any of the compilers found is a GNU compiler.
+dnl To do this, when you have finally made up your mind on which one to use,
+dnl and set CC accordingly, invoke [GMP_PROG_CC_SELECT]. That macro will
+dnl also make sure that your selection of CFLAGS is valid.
+dnl
+AC_DEFUN(GMP_PROG_CC_FIND,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+ifelse([$1], , gmp_cc_list="gcc cc", gmp_cc_list="[$1]")
+ifelse([$2], , gmp_req_64bit_cc="no", gmp_req_64bit_cc="[$2]")
+
+CC32=
+CC64=
+for c in $gmp_cc_list; do
+ # Avoid cache hits.
+ unset CC
+ unset ac_cv_prog_CC
+ AC_CHECK_TOOL(CC, $c, $c)
+ if test -n "$CC"; then
+ eval c_flags=\$gmp_cflags_$c
+ GMP_PROG_CC_WORKS($CC, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" != "yes"; then
+ continue
+ fi
+
+ # Save first working compiler, whether 32- or 64-bit capable.
+ if test -z "$CC32"; then
+ CC32="$CC"
+ fi
+ if test "$gmp_req_64bit_cc" = "yes"; then
+ eval c_flags=\$gmp_cflags64_$c
+
+ # Verify that the compiler works in 64-bit mode as well.
+ # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link.
+ GMP_PROG_CC_WORKS($c, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" = "yes"; then
+ GMP_CHECK_CC_64BIT($c, $c_flags)
+ if test "$gmp_cv_cc_64bit" = "yes"; then
+ test -z "$CC64" && CC64="$c"
+ test -z "$CFLAGS64" && CFLAGS64="$c_flags"
+ # We have CC64 so we're done.
+ break
+ fi
+ fi
+ else
+ # We have CC32, and we don't need a 64-bit compiler so we're done.
+ break
+ fi
+ fi
+done
+CC="$CC32"
+])dnl
+
+dnl GMP_PROG_CC_SELECT
+dnl Check that `CC' works with `CFLAGS'. Check if `CC' is a GNU compiler.
+dnl Cache the result as `ac_cv_prog_CC'.
+AC_DEFUN(GMP_PROG_CC_SELECT,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+AC_PROG_CC_WORKS
+AC_PROG_CC_GNU
+
+if test "$ac_cv_prog_gcc" = "yes"; then
+ GCC=yes
+else
+ GCC=
+fi
+
+# Set CFLAGS if not already set.
+if test -z "$CFLAGS"; then
+ CFLAGS="-g"
+ if test "$GCC" = "yes"; then
+ CFLAGS="$CFLAGS -O2"
+ fi
+fi
+
+AC_SUBST(CC)
+AC_CACHE_VAL(ac_cv_prog_CC, ac_cv_prog_CC="$CC")
+AC_PROVIDE([AC_PROG_CC])
+])dnl
+
+dnl GMP_CHECK_CC_64BIT(cc, cflags64)
+dnl Find out if `CC' can produce 64-bit code.
+dnl Requires NM to be set to nm for target.
+dnl FIXME: Cache result.
+AC_DEFUN(GMP_CHECK_CC_64BIT,
+[
+ gmp_tmp_CC_save="$CC"
+ CC="[$1]"
+ AC_MSG_CHECKING([whether the C compiler ($CC) is 64-bit capable])
+ if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+ fi
+ gmp_tmp_CFLAGS_save="$CFLAGS"
+ CFLAGS="[$2]"
+
+ case "$target" in
+ hppa2.0*-*-*)
+ # FIXME: If gcc is installed under another name than "gcc", we will
+ # test the wrong thing.
+ if test "$CC" != "gcc"; then
+ dnl Let compiler version A.10.32.30 or higher be ok.
+ dnl Bad compiler output:
+ dnl ccom: HP92453-01 G.10.32.05 HP C Compiler
+ dnl Good compiler output:
+ dnl ccom: HP92453-01 A.10.32.30 HP C Compiler
+ echo >conftest.c
+ gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"`
+ rm conftest*
+ gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'`
+ gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+ gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+ gmp_cv_cc_64bit=no
+ test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \
+ && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \
+ && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \
+ && gmp_cv_cc_64bit=yes
+ else # gcc
+ # FIXME: Compile a minimal file and determine if the resulting object
+ # file is an ELF file. If so, gcc can produce 64-bit code.
+ # Do we have file(1) for target?
+ gmp_cv_cc_64bit=no
+ fi
+ ;;
+ mips-sgi-irix6.*)
+ # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit
+ # arithmetic but not 64-bit pointers, so the general test for sizeof
+ # (void *) is not valid.
+ # Simply try to compile an empty main. If that succeeds return
+ # true.
+ AC_TRY_COMPILE( , ,
+ gmp_cv_cc_64bit=yes, gmp_cv_cc_64bit=no,
+ gmp_cv_cc_64bit=no)
+ ;;
+ *-*-*)
+ # Allocate an array of size sizeof (void *) and use nm to determine its
+ # size. We depend on the first declared variable being put at address 0.
+ cat >conftest.c <<EOF
+[char arr[sizeof (void *)]={0};
+char post=0;]
+EOF
+ gmp_compile="$CC $CFLAGS -c conftest.c 1>&AC_FD_CC"
+ if AC_TRY_EVAL(gmp_compile); then
+ changequote(<,>)dnl
+ gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "8"; then
+ gmp_cv_cc_64bit=yes
+ else
+ gmp_cv_cc_64bit=no
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+ gmp_cv_cc_64bit=no
+ fi
+ rm -f conftest*
+ ;;
+ esac
+
+ CC="$gmp_tmp_CC_save"
+ CFLAGS="$gmp_tmp_CFLAGS_save"
+ AC_MSG_RESULT($gmp_cv_cc_64bit)
+])dnl
+
+dnl GMP_INIT([M4-DEF-FILE])
+dnl
+AC_DEFUN(GMP_INIT,
+[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]")
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4
+test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i
+test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p
+])dnl
+
+dnl GMP_FINISH
+dnl ----------
+dnl Create config.m4 from its accumulated parts.
+dnl
+dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include
+dnl of config.m4 is harmless.
+dnl
+dnl A separate ifdef on the angle bracket quoted part ensures the quoting
+dnl style there is respected. The basic defines from gmp_tmpconfigm4 are
+dnl fully quoted but are still put under an ifdef in case any have been
+dnl redefined by one of the m4 include files.
+dnl
+dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't
+dnl work, since it'd interpret parentheses and quotes in dnl comments, and
+dnl having a whole file as a macro argument would overflow the string space
+dnl on BSD m4.
+
+AC_DEFUN(GMP_FINISH,
+[AC_REQUIRE([GMP_INIT])
+echo "creating $gmp_configm4"
+echo ["dnl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+ echo ["changequote(<,>)dnl"] >> $gmp_configm4
+ echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4
+ cat $gmp_tmpconfigm4 >> $gmp_configm4
+ echo [">)"] >> $gmp_configm4
+ echo ["changequote(\`,')dnl"] >> $gmp_configm4
+ rm $gmp_tmpconfigm4
+fi
+echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+ cat $gmp_tmpconfigm4i >> $gmp_configm4
+ rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+ cat $gmp_tmpconfigm4p >> $gmp_configm4
+ rm $gmp_tmpconfigm4p
+fi
+echo ["')"] >> $gmp_configm4
+echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4
+])dnl
+
+dnl GMP_INCLUDE(FILE)
+AC_DEFUN(GMP_INCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["include(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_SINCLUDE(FILE)
+AC_DEFUN(GMP_SINCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["sinclude(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION])
+dnl [ Define M4 macro MACRO as DEFINITION in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Mind the quoting! No shell variables will get expanded. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+dnl [ config.m4 uses `<' and '>' as quote characters for all defines. ]
+AC_DEFUN(GMP_DEFINE,
+[AC_REQUIRE([GMP_INIT])
+echo ['define(<$1>, <$2>)'] >> ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_DEFINE_RAW(STRING, [, LOCATION])
+dnl [ Put STRING in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+AC_DEFUN(GMP_DEFINE_RAW,
+[AC_REQUIRE([GMP_INIT])
+echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_CHECK_ASM_LABEL_SUFFIX
+dnl Should a label have a colon or not?
+AC_DEFUN(GMP_CHECK_ASM_LABEL_SUFFIX,
+[AC_CACHE_CHECK([what assembly label suffix to use],
+ gmp_cv_check_asm_label_suffix,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_label_suffix=[""] ;;
+ *) gmp_cv_check_asm_label_suffix=[":"] ;;
+esac
+])
+echo ["define(<LABEL_SUFFIX>, <\$][1$gmp_cv_check_asm_label_suffix>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_UNDERSCORE([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Shamelessly borrowed from glibc.
+AC_DEFUN(GMP_CHECK_ASM_UNDERSCORE,
+[AC_CACHE_CHECK([if symbols are prefixed by underscore],
+ gmp_cv_check_asm_underscore,
+[cat > conftest.$ac_ext <<EOF
+dnl This sometimes fails to find confdefs.h, for some reason.
+dnl [#]line __oline__ "[$]0"
+[#]line __oline__ "configure"
+#include "confdefs.h"
+int underscore_test() {
+return; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ if grep _underscore_test conftest* >/dev/null; then
+ gmp_cv_check_asm_underscore=yes
+ else
+ gmp_cv_check_asm_underscore=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_underscore" = "yes"; then
+ GMP_DEFINE(GSYM_PREFIX, [_])
+ ifelse([$1], , :, [$1])
+else
+ GMP_DEFINE(GSYM_PREFIX, [])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_ALIGN_LOG([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Is parameter to `.align' logarithmic?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_LOG,
+[AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([if .align assembly directive is logarithmic],
+ gmp_cv_check_asm_align_log,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ .align 4
+ $gmp_cv_check_asm_globl foo
+ .byte 1
+ .align 4
+foo$gmp_cv_check_asm_label_suffix
+ .byte 2
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+ gmp_cv_check_asm_align_log=yes
+ else
+ gmp_cv_check_asm_align_log=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)"])
+if test "$gmp_cv_check_asm_align_log" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+
+dnl GMP_CHECK_ASM_ALIGN_FILL_0x90
+dnl -----------------------------
+dnl Determine whether a ",0x90" suffix works on a .align directive.
+dnl This is only meant for use on x86, where 0x90 is a "nop".
+dnl
+dnl Old gas, eg. 1.92.3 - needs ",0x90" or else the fill is an invalid 0x00.
+dnl New gas, eg. 2.91 - generates the good multibyte nop fills even when
+dnl ",0x90" is given.
+dnl Solaris 2.6 as - doesn't allow ",0x90", gives a fatal error.
+dnl Solaris 2.8 as - gives a warning for ",0x90", no ill effect.
+dnl
+dnl Note that both solaris "as"s only care about ",0x90" if they actually
+dnl have to use it to fill something, hence the .byte in the sample. It's
+dnl only the second .align that provokes an error or warning.
+dnl
+dnl We prefer to suppress the warning from solaris 2.8 to stop anyone
+dnl worrying something might be wrong.
+
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_FILL_0x90,
+[AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text],
+ gmp_cv_check_asm_align_fill_0x90,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ .align 4, 0x90
+ .byte 0
+ .align 4, 0x90
+EOF
+gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`"
+if test $? = 0; then
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then
+ echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+ else
+ gmp_cv_check_asm_align_fill_0x90=yes
+ fi
+else
+ echo "Non-zero exit code" 1>&AC_FD_CC
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(
+["define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)"])
+])
+
+
+dnl GMP_CHECK_ASM_TEXT
+AC_DEFUN(GMP_CHECK_ASM_TEXT,
+[AC_CACHE_CHECK([how to switch to text section], gmp_cv_check_asm_text,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_text={".csect .text[PR]"}
+ changequote([, ])
+ ;;
+ *-*-hpux*) gmp_cv_check_asm_text=[".code"] ;;
+ *) gmp_cv_check_asm_text=[".text"] ;;
+esac
+])
+echo ["define(<TEXT>, <$gmp_cv_check_asm_text>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_DATA
+dnl Can we say `.data'?
+AC_DEFUN(GMP_CHECK_ASM_DATA,
+[AC_CACHE_CHECK([how to switch to data section], gmp_cv_check_asm_data,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_data={".csect .data[RW]"}
+ changequote([, ])
+ ;;
+ *) gmp_cv_check_asm_data=[".data"] ;;
+esac
+])
+echo ["define(<DATA>, <$gmp_cv_check_asm_data>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_GLOBL
+dnl Can we say `.global'?
+AC_DEFUN(GMP_CHECK_ASM_GLOBL,
+[AC_CACHE_CHECK([how to export a symbol], gmp_cv_check_asm_globl,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_globl=[".export"] ;;
+ *) gmp_cv_check_asm_globl=[".globl"] ;;
+esac
+])
+echo ["define(<GLOBL>, <$gmp_cv_check_asm_globl>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_TYPE
+dnl Can we say `.type'?
+AC_DEFUN(GMP_CHECK_ASM_TYPE,
+[AC_CACHE_CHECK([how the .type assembly directive should be used],
+gmp_cv_check_asm_type,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+for gmp_tmp_prefix in @ \# %; do
+ echo " .type sym,${gmp_tmp_prefix}function" > conftest.s
+ if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_type="[.type \$][1,${gmp_tmp_prefix}\$][2]"
+ break
+ fi
+done
+if test -z "$gmp_cv_check_asm_type"; then
+ gmp_cv_check_asm_type="[dnl]"
+fi
+])
+echo ["define(<TYPE>, <$gmp_cv_check_asm_type>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_SIZE
+dnl Can we say `.size'?
+AC_DEFUN(GMP_CHECK_ASM_SIZE,
+[AC_CACHE_CHECK([if the .size assembly directive works], gmp_cv_check_asm_size,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+echo ' .size sym,1' > conftest.s
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_size="[.size \$][1,\$][2]"
+else
+ gmp_cv_check_asm_size="[dnl]"
+fi
+])
+echo ["define(<SIZE>, <$gmp_cv_check_asm_size>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_LSYM_PREFIX
+dnl What is the prefix for a local label?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_LSYM_PREFIX,
+[AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([what prefix to use for a local label],
+gmp_cv_check_asm_lsym_prefix,
+[if test -z "$NM"; then
+ echo; echo ["$0: fatal: need nm"]
+ exit 1
+fi
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+gmp_cv_check_asm_lsym_prefix="L"
+for gmp_tmp_pre in L .L $ L$; do
+ cat > conftest.s <<EOF
+dummy${gmp_cv_check_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ $NM conftest.o >/dev/null 2>&1
+ gmp_rc=$?
+ if test "$gmp_rc" != "0"; then
+ echo "configure: $NM failure, using default"
+ break
+ fi
+ if $NM conftest.o | grep gurkmacka >/dev/null; then true; else
+ gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre"
+ break
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+ # Use default.
+ fi
+done
+rm -f conftest*
+])
+echo ["define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_W32
+dnl How to [define] a 32-bit word.
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_W32,
+[AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([how to [define] a 32-bit word],
+ gmp_cv_check_asm_w32,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+
+# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+# that it's at 0x0. We'll have to declare another symbol before the
+# .long/.word and look at the distance between the two symbols. The
+# only problem is that the sed expression(s) barfs (on Solaris, for
+# example) for the symbol with value 0. For now, HPUX uses .word.
+
+case "$target" in
+ *-*-hpux*)
+ gmp_cv_check_asm_w32=".word"
+ ;;
+ *-*-*)
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+ for gmp_tmp_op in .long .word; do
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ $gmp_cv_check_asm_globl foo
+ $gmp_tmp_op 0
+foo${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "4"; then
+ gmp_cv_check_asm_w32="$gmp_tmp_op"
+ break
+ fi
+ fi
+ done
+ ;;
+esac
+
+if test -z "$gmp_cv_check_asm_w32"; then
+ echo; echo ["configure: $0: fatal: do not know how to define a 32-bit word"]
+ exit 1
+fi
+rm -f conftest*
+])
+echo ["define(<W32>, <$gmp_cv_check_asm_w32>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_MMX([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+dnl Can we assemble MMX insns?
+AC_DEFUN(GMP_CHECK_ASM_MMX,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler knows about MMX instructions],
+ gmp_cv_check_asm_mmx,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ por %mm0, %mm0
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_mmx=yes
+else
+ gmp_cv_check_asm_mmx=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_mmx" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ AC_MSG_WARN([+----------------------------------------------------------])
+ AC_MSG_WARN([| WARNING WARNING WARNING])
+ AC_MSG_WARN([| Target CPU has MMX code, but it can't be assembled by])
+ AC_MSG_WARN([| $CCAS $CFLAGS])
+ AC_MSG_WARN([| Non-MMX replacements will be used.])
+ AC_MSG_WARN([| This will be an inferior build.])
+ AC_MSG_WARN([+----------------------------------------------------------])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_SHLDL_CL([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+AC_DEFUN(GMP_CHECK_ASM_SHLDL_CL,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler takes cl with shldl],
+ gmp_cv_check_asm_shldl_cl,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ shldl %cl, %eax, %ebx
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_shldl_cl=yes
+else
+ gmp_cv_check_asm_shldl_cl=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_PROG_CC_WORKS(CC, CFLAGS, ACTION-IF-WORKS, [ACTION-IF-NOT-WORKS])
+dnl Check if CC can compile and link. Perform various target specific tests.
+dnl FIXME: Require `$target'.
+AC_DEFUN(GMP_PROG_CC_WORKS,
+[AC_LANG_C dnl Note: Destructive.
+CC="[$1]"
+CFLAGS="[$2]"
+AC_MSG_CHECKING([if the C compiler ($CC) works with flags $CFLAGS])
+
+# Simple test for all targets.
+AC_TRY_COMPILER([int main(){return(0);}],
+ tmp_works, tmp_cross)
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ AC_TRY_COMPILE( , [} void *g(); void *f() { return g(); } int bar(){],
+ tmp_works=yes, tmp_works=no)
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ [$3]
+else
+ ifelse([$4], , :, [$4])
+fi
+
+AC_MSG_RESULT($tmp_works)
+])dnl
+
+
+dnl GMP_C_ANSI2KNR
+dnl --------------
+dnl Setup to use ansi2knr if necessary.
+dnl
+dnl The test here is simply that if an ANSI style function works then
+dnl ansi2knr isn't needed. The normal tests for whether $CC works mean we
+dnl don't need to worry here about anything badly broken.
+dnl
+dnl AM_C_PROTOTYPES is the normal way to set up ansi2knr, but (in automake
+dnl March 2000) it gives the wrong answer on a C++ compiler because its
+dnl test requires that the compiler accept both ANSI and K&R, or otherwise
+dnl ansi2knr is used. A C++ compiler fails on the K&R part, which makes
+dnl AM_C_PROTOTYPES think it needs ansi2knr! GMP has no bare K&R so we
+dnl only need ANSI or K&R to work, not both.
+
+AC_DEFUN(GMP_C_ANSI2KNR,
+[AC_CACHE_CHECK([if ansi2knr should be used],
+ gmp_cv_c_ansi2knr,
+[cat >conftest.c <<EOF
+int main (int argc, char *argv[]) { return 0; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ gmp_cv_c_ansi2knr=no
+else
+ gmp_cv_c_ansi2knr=yes
+fi
+rm -f conftest.*
+])
+if test $gmp_cv_c_ansi2knr = no; then
+ U= ANSI2KNR=
+else
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+ AC_HEADER_STDC
+ AC_CHECK_HEADERS(string.h)
+fi
+AC_SUBST(U)
+AC_SUBST(ANSI2KNR)
+])
+
+
+dnl Deal with bad synchronization of Autoconf with Libtool.
+AC_DEFUN(AC_CANONICAL_BUILD, [_AC_CANONICAL_BUILD])
+AC_DEFUN(AC_CHECK_TOOL_PREFIX, [_AC_CHECK_TOOL_PREFIX])
+
+
+# serial 1
+
+AC_DEFUN(AM_C_PROTOTYPES,
+[AC_REQUIRE([AM_PROG_CC_STDC])
+AC_REQUIRE([AC_PROG_CPP])
+AC_MSG_CHECKING([for function prototypes])
+if test "$am_cv_prog_cc_stdc" != no; then
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(PROTOTYPES,1,[Define if compiler has function prototypes])
+ U= ANSI2KNR=
+else
+ AC_MSG_RESULT(no)
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+ AC_HEADER_STDC
+ AC_CHECK_HEADERS(string.h)
+fi
+AC_SUBST(U)dnl
+AC_SUBST(ANSI2KNR)dnl
+])
+
+
+# serial 1
+
+# @defmac AC_PROG_CC_STDC
+# @maindex PROG_CC_STDC
+# @ovindex CC
+# If the C compiler in not in ANSI C mode by default, try to add an option
+# to output variable @code{CC} to make it so. This macro tries various
+# options that select ANSI C on some system or another. It considers the
+# compiler to be in ANSI C mode if it handles function prototypes correctly.
+#
+# If you use this macro, you should check after calling it whether the C
+# compiler has been set to accept ANSI C; if not, the shell variable
+# @code{am_cv_prog_cc_stdc} is set to @samp{no}. If you wrote your source
+# code in ANSI C, you can make an un-ANSIfied copy of it by using the
+# program @code{ansi2knr}, which comes with Ghostscript.
+# @end defmac
+
+AC_DEFUN(AM_PROG_CC_STDC,
+[AC_REQUIRE([AC_PROG_CC])
+AC_BEFORE([$0], [AC_C_INLINE])
+AC_BEFORE([$0], [AC_C_CONST])
+dnl Force this before AC_PROG_CPP. Some cpp's, eg on HPUX, require
+dnl a magic option to avoid problems with ANSI preprocessor commands
+dnl like #elif.
+dnl FIXME: can't do this because then AC_AIX won't work due to a
+dnl circular dependency.
+dnl AC_BEFORE([$0], [AC_PROG_CPP])
+AC_MSG_CHECKING(for ${CC-cc} option to accept ANSI C)
+AC_CACHE_VAL(am_cv_prog_cc_stdc,
+[am_cv_prog_cc_stdc=no
+ac_save_CC="$CC"
+# Don't try gcc -ansi; that turns off useful extensions and
+# breaks some systems' header files.
+# AIX -qlanglvl=ansi
+# Ultrix and OSF/1 -std1
+# HP-UX 10.20 and later -Ae
+# HP-UX older versions -Aa -D_HPUX_SOURCE
+# SVR4 -Xc -D__EXTENSIONS__
+for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ AC_TRY_COMPILE(
+[#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+], [
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+],
+[am_cv_prog_cc_stdc="$ac_arg"; break])
+done
+CC="$ac_save_CC"
+])
+if test -z "$am_cv_prog_cc_stdc"; then
+ AC_MSG_RESULT([none needed])
+else
+ AC_MSG_RESULT($am_cv_prog_cc_stdc)
+fi
+case "x$am_cv_prog_cc_stdc" in
+ x|xno) ;;
+ *) CC="$CC $am_cv_prog_cc_stdc" ;;
+esac
+])
+
+# Do all the work for Automake. This macro actually does too much --
+# some checks are only needed if your package does certain things.
+# But this isn't really a big deal.
+
+# serial 1
+
+dnl Usage:
+dnl AM_INIT_AUTOMAKE(package,version, [no-define])
+
+AC_DEFUN(AM_INIT_AUTOMAKE,
+[AC_REQUIRE([AC_PROG_INSTALL])
+dnl We require 2.13 because we rely on SHELL being computed by configure.
+AC_PREREQ([2.13])
+PACKAGE=[$1]
+AC_SUBST(PACKAGE)
+VERSION=[$2]
+AC_SUBST(VERSION)
+dnl test to see if srcdir already configured
+if test "`CDPATH=: && cd $srcdir && pwd`" != "`pwd`" &&
+ test -f $srcdir/config.status; then
+ AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+fi
+ifelse([$3],,
+AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
+AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package]))
+AC_REQUIRE([AM_SANITY_CHECK])
+AC_REQUIRE([AC_ARG_PROGRAM])
+AM_MISSING_PROG(ACLOCAL, aclocal)
+AM_MISSING_PROG(AUTOCONF, autoconf)
+AM_MISSING_PROG(AUTOMAKE, automake)
+AM_MISSING_PROG(AUTOHEADER, autoheader)
+AM_MISSING_PROG(MAKEINFO, makeinfo)
+AM_MISSING_PROG(AMTAR, tar)
+AM_MISSING_INSTALL_SH
+dnl We need awk for the "check" target. The system "awk" is bad on
+dnl some platforms.
+AC_REQUIRE([AC_PROG_AWK])
+AC_REQUIRE([AC_PROG_MAKE_SET])
+AC_REQUIRE([AM_DEP_TRACK])
+AC_REQUIRE([AM_SET_DEPDIR])
+ifdef([AC_PROVIDE_AC_PROG_CC], [AM_DEPENDENCIES(CC)], [
+ define([AC_PROG_CC], defn([AC_PROG_CC])[AM_DEPENDENCIES(CC)])])
+ifdef([AC_PROVIDE_AC_PROG_CXX], [AM_DEPENDENCIES(CXX)], [
+ define([AC_PROG_CXX], defn([AC_PROG_CXX])[AM_DEPENDENCIES(CXX)])])
+])
+
+#
+# Check to make sure that the build environment is sane.
+#
+
+AC_DEFUN(AM_SANITY_CHECK,
+[AC_MSG_CHECKING([whether build environment is sane])
+# Just in case
+sleep 1
+echo timestamp > conftestfile
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments. Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+ set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null`
+ if test "[$]*" = "X"; then
+ # -L didn't work.
+ set X `ls -t $srcdir/configure conftestfile`
+ fi
+ if test "[$]*" != "X $srcdir/configure conftestfile" \
+ && test "[$]*" != "X conftestfile $srcdir/configure"; then
+
+ # If neither matched, then we have a broken ls. This can happen
+ # if, for instance, CONFIG_SHELL is bash and it inherits a
+ # broken ls alias from the environment. This has actually
+ # happened. Such a system could not be considered "sane".
+ AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
+alias in your environment])
+ fi
+
+ test "[$]2" = conftestfile
+ )
+then
+ # Ok.
+ :
+else
+ AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+rm -f conftest*
+AC_MSG_RESULT(yes)])
+
+dnl AM_MISSING_PROG(NAME, PROGRAM)
+AC_DEFUN(AM_MISSING_PROG, [
+AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+dnl Like AM_MISSING_PROG, but only looks for install-sh.
+dnl AM_MISSING_INSTALL_SH()
+AC_DEFUN(AM_MISSING_INSTALL_SH, [
+AC_REQUIRE([AM_MISSING_HAS_RUN])
+if test -z "$install_sh"; then
+ install_sh="$ac_aux_dir/install-sh"
+ test -f "$install_sh" || install_sh="$ac_aux_dir/install.sh"
+ test -f "$install_sh" || install_sh="${am_missing_run}${ac_auxdir}/install-sh"
+ dnl FIXME: an evil hack: we remove the SHELL invocation from
+ dnl install_sh because automake adds it back in. Sigh.
+ install_sh="`echo $install_sh | sed -e 's/\${SHELL}//'`"
+fi
+AC_SUBST(install_sh)])
+
+dnl AM_MISSING_HAS_RUN.
+dnl Define MISSING if not defined so far and test if it supports --run.
+dnl If it does, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN], [
+test x"${MISSING+set}" = xset || \
+ MISSING="\${SHELL} `CDPATH=: && cd $ac_aux_dir && pwd`/missing"
+dnl Use eval to expand $SHELL
+if eval "$MISSING --run :"; then
+ am_missing_run="$MISSING --run "
+else
+ am_missing_run=
+ am_backtick='`'
+ AC_MSG_WARN([${am_backtick}missing' script is too old or missing])
+fi
+])
+
+dnl See how the compiler implements dependency checking.
+dnl Usage:
+dnl AM_DEPENDENCIES(NAME)
+dnl NAME is "CC", "CXX" or "OBJC".
+
+dnl We try a few techniques and use that to set a single cache variable.
+
+AC_DEFUN(AM_DEPENDENCIES,[
+AC_REQUIRE([AM_SET_DEPDIR])
+AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])
+ifelse([$1],CC,[
+AC_REQUIRE([AC_PROG_CC])
+AC_REQUIRE([AC_PROG_CPP])
+depcc="$CC"
+depcpp="$CPP"],[$1],CXX,[
+AC_REQUIRE([AC_PROG_CXX])
+AC_REQUIRE([AC_PROG_CXXCPP])
+depcc="$CXX"
+depcpp="$CXXCPP"],[$1],OBJC,[
+am_cv_OBJC_dependencies_compiler_type=gcc],[
+AC_REQUIRE([AC_PROG_][$1])
+depcc="$[$1]"
+depcpp=""])
+AC_MSG_CHECKING([dependency style of $depcc])
+AC_CACHE_VAL(am_cv_[$1]_dependencies_compiler_type,[
+if test -z "$AMDEP"; then
+ echo '#include "conftest.h"' > conftest.c
+ echo 'int i;' > conftest.h
+
+ am_cv_[$1]_dependencies_compiler_type=none
+ for depmode in `sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < "$am_depcomp"`; do
+ case "$depmode" in
+ nosideeffect)
+ # after this tag, mechanisms are not by side-effect, so they'll
+ # only be used when explicitly requested
+ if test "x$enable_dependency_tracking" = xyes; then
+ continue
+ else
+ break
+ fi
+ ;;
+ none) break ;;
+ esac
+ if depmode="$depmode" \
+ source=conftest.c object=conftest.o \
+ depfile=conftest.Po tmpdepfile=conftest.TPo \
+ $SHELL $am_depcomp $depcc -c conftest.c 2>/dev/null &&
+ grep conftest.h conftest.Po > /dev/null 2>&1; then
+ am_cv_[$1]_dependencies_compiler_type="$depmode"
+ break
+ fi
+ done
+
+ rm -f conftest.*
+else
+ am_cv_[$1]_dependencies_compiler_type=none
+fi
+])
+AC_MSG_RESULT($am_cv_[$1]_dependencies_compiler_type)
+[$1]DEPMODE="depmode=$am_cv_[$1]_dependencies_compiler_type"
+AC_SUBST([$1]DEPMODE)
+])
+
+dnl Choose a directory name for dependency files.
+dnl This macro is AC_REQUIREd in AM_DEPENDENCIES
+
+AC_DEFUN(AM_SET_DEPDIR,[
+if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then
+ DEPDIR=.deps
+else
+ DEPDIR=_deps
+fi
+AC_SUBST(DEPDIR)
+])
+
+AC_DEFUN(AM_DEP_TRACK,[
+AC_ARG_ENABLE(dependency-tracking,
+[ --disable-dependency-tracking Speeds up one-time builds
+ --enable-dependency-tracking Do not reject slow dependency extractors])
+if test "x$enable_dependency_tracking" = xno; then
+ AMDEP="#"
+else
+ am_depcomp="$ac_aux_dir/depcomp"
+ if test ! -f "$am_depcomp"; then
+ AMDEP="#"
+ else
+ AMDEP=
+ fi
+fi
+AC_SUBST(AMDEP)
+if test -z "$AMDEP"; then
+ AMDEPBACKSLASH='\'
+else
+ AMDEPBACKSLASH=
+fi
+pushdef([subst], defn([AC_SUBST]))
+subst(AMDEPBACKSLASH)
+popdef([subst])
+])
+
+dnl Generate code to set up dependency tracking.
+dnl This macro should only be invoked once -- use via AC_REQUIRE.
+dnl Usage:
+dnl AM_OUTPUT_DEPENDENCY_COMMANDS
+
+dnl
+dnl This code is only required when automatic dependency tracking
+dnl is enabled. FIXME. This creates each `.P' file that we will
+dnl need in order to bootstrap the dependency handling code.
+AC_DEFUN(AM_OUTPUT_DEPENDENCY_COMMANDS,[
+AC_OUTPUT_COMMANDS([
+test x"$AMDEP" != x"" ||
+for mf in $CONFIG_FILES; do
+ case "$mf" in
+ Makefile) dirpart=.;;
+ */Makefile) dirpart=`echo "$mf" | sed -e 's|/[^/]*$||'`;;
+ *) continue;;
+ esac
+ grep '^DEP_FILES *= *[^ #]' < "$mf" > /dev/null || continue
+ # Extract the definition of DEP_FILES from the Makefile without
+ # running `make'.
+ DEPDIR=`sed -n -e '/^DEPDIR = / s///p' < "$mf"`
+ test -z "$DEPDIR" && continue
+ # When using ansi2knr, U may be empty or an underscore; expand it
+ U=`sed -n -e '/^U = / s///p' < "$mf"`
+ test -d "$dirpart/$DEPDIR" || mkdir "$dirpart/$DEPDIR"
+ # We invoke sed twice because it is the simplest approach to
+ # changing $(DEPDIR) to its actual value in the expansion.
+ for file in `sed -n -e '
+ /^DEP_FILES = .*\\\\$/ {
+ s/^DEP_FILES = //
+ :loop
+ s/\\\\$//
+ p
+ n
+ /\\\\$/ b loop
+ p
+ }
+ /^DEP_FILES = / s/^DEP_FILES = //p' < "$mf" | \
+ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
+ # Make sure the directory exists.
+ test -f "$dirpart/$file" && continue
+ fdir=`echo "$file" | sed -e 's|/[^/]*$||'`
+ $ac_aux_dir/mkinstalldirs "$dirpart/$fdir" > /dev/null 2>&1
+ # echo "creating $dirpart/$file"
+ echo '# dummy' > "$dirpart/$file"
+ done
+done
+], [AMDEP="$AMDEP"
+ac_aux_dir="$ac_aux_dir"])])
+
+# Like AC_CONFIG_HEADER, but automatically create stamp file.
+
+AC_DEFUN(AM_CONFIG_HEADER,
+[AC_PREREQ([2.12])
+AC_CONFIG_HEADER([$1])
+dnl When config.status generates a header, we must update the stamp-h file.
+dnl This file resides in the same directory as the config header
+dnl that is generated. We must strip everything past the first ":",
+dnl and everything past the last "/".
+AC_OUTPUT_COMMANDS(changequote(<<,>>)dnl
+ifelse(patsubst(<<$1>>, <<[^ ]>>, <<>>), <<>>,
+<<test -z "<<$>>CONFIG_HEADERS" || echo timestamp > patsubst(<<$1>>, <<^\([^:]*/\)?.*>>, <<\1>>)stamp-h<<>>dnl>>,
+<<am_indx=1
+for am_file in <<$1>>; do
+ case " <<$>>CONFIG_HEADERS " in
+ *" <<$>>am_file "*<<)>>
+ echo timestamp > `echo <<$>>am_file | sed -e 's%:.*%%' -e 's%[^/]*$%%'`stamp-h$am_indx
+ ;;
+ esac
+ am_indx=`expr "<<$>>am_indx" + 1`
+done<<>>dnl>>)
+changequote([,]))])
+
+# Add --enable-maintainer-mode option to configure.
+# From Jim Meyering
+
+# serial 1
+
+AC_DEFUN(AM_MAINTAINER_MODE,
+[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
+ dnl maintainer-mode is disabled by default
+ AC_ARG_ENABLE(maintainer-mode,
+[ --enable-maintainer-mode enable make rules and dependencies not useful
+ (and sometimes confusing) to the casual installer],
+ USE_MAINTAINER_MODE=$enableval,
+ USE_MAINTAINER_MODE=no)
+ AC_MSG_RESULT($USE_MAINTAINER_MODE)
+ AM_CONDITIONAL(MAINTAINER_MODE, test $USE_MAINTAINER_MODE = yes)
+ MAINT=$MAINTAINER_MODE_TRUE
+ AC_SUBST(MAINT)dnl
+]
+)
+
+# Define a conditional.
+
+AC_DEFUN(AM_CONDITIONAL,
+[AC_SUBST($1_TRUE)
+AC_SUBST($1_FALSE)
+if $2; then
+ $1_TRUE=
+ $1_FALSE='#'
+else
+ $1_TRUE='#'
+ $1_FALSE=
+fi])
+
+
+# serial 42 AC_PROG_LIBTOOL
+AC_DEFUN(AC_PROG_LIBTOOL,
+[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl
+
+# Save cache, so that ltconfig can load it
+AC_CACHE_SAVE
+
+# Actually configure libtool. ac_aux_dir is where install-sh is found.
+AR="$AR" CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" \
+MAGIC="$MAGIC" LD="$LD" LDFLAGS="$LDFLAGS" LIBS="$LIBS" \
+LN_S="$LN_S" NM="$NM" RANLIB="$RANLIB" STRIP="$STRIP" \
+AS="$AS" DLLTOOL="$DLLTOOL" OBJDUMP="$OBJDUMP" \
+objext="$OBJEXT" exeext="$EXEEXT" reload_flag="$reload_flag" \
+deplibs_check_method="$deplibs_check_method" file_magic_cmd="$file_magic_cmd" \
+${CONFIG_SHELL-/bin/sh} $ac_aux_dir/ltconfig --no-reexec \
+$libtool_flags --no-verify --build="$build" $ac_aux_dir/ltmain.sh $lt_target \
+|| AC_MSG_ERROR([libtool configure failed])
+
+# Reload cache, that may have been modified by ltconfig
+AC_CACHE_LOAD
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltconfig $ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+AC_SUBST(LIBTOOL)dnl
+
+# Redirect the config.log output again, so that the ltconfig log is not
+# clobbered by the next message.
+exec 5>>./config.log
+])
+
+AC_DEFUN(AC_LIBTOOL_SETUP,
+[AC_PREREQ(2.13)dnl
+AC_REQUIRE([AC_ENABLE_SHARED])dnl
+AC_REQUIRE([AC_ENABLE_STATIC])dnl
+AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_LD])dnl
+AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl
+AC_REQUIRE([AC_PROG_NM])dnl
+AC_REQUIRE([AC_PROG_LN_S])dnl
+AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl
+AC_REQUIRE([AC_OBJEXT])dnl
+AC_REQUIRE([AC_EXEEXT])dnl
+dnl
+
+# Only perform the check for file, if the check method requires it
+case "$deplibs_check_method" in
+file_magic*)
+ if test "$file_magic_cmd" = '${MAGIC}'; then
+ AC_PATH_MAGIC
+ fi
+ ;;
+esac
+
+case "$target" in
+NONE) lt_target="$host" ;;
+*) lt_target="$target" ;;
+esac
+
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+AC_CHECK_TOOL(STRIP, strip, :)
+
+# Check for any special flags to pass to ltconfig.
+libtool_flags="--cache-file=$cache_file"
+test "$enable_shared" = no && libtool_flags="$libtool_flags --disable-shared"
+test "$enable_static" = no && libtool_flags="$libtool_flags --disable-static"
+test "$enable_fast_install" = no && libtool_flags="$libtool_flags --disable-fast-install"
+test "$ac_cv_prog_gcc" = yes && libtool_flags="$libtool_flags --with-gcc"
+test "$ac_cv_prog_gnu_ld" = yes && libtool_flags="$libtool_flags --with-gnu-ld"
+ifdef([AC_PROVIDE_AC_LIBTOOL_DLOPEN],
+[libtool_flags="$libtool_flags --enable-dlopen"])
+ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL],
+[libtool_flags="$libtool_flags --enable-win32-dll"])
+AC_ARG_ENABLE(libtool-lock,
+ [ --disable-libtool-lock avoid locking (might break parallel builds)])
+test "x$enable_libtool_lock" = xno && libtool_flags="$libtool_flags --disable-lock"
+test x"$silent" = xyes && libtool_flags="$libtool_flags --silent"
+
+AC_ARG_WITH(pic,
+ [ --with-pic try to use only PIC/non-PIC objects [default=use both]],
+ pic_mode="$withval", pic_mode=default)
+test x"$pic_mode" = xyes && libtool_flags="$libtool_flags --prefer-pic"
+test x"$pic_mode" = xno && libtool_flags="$libtool_flags --prefer-non-pic"
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case "$lt_target" in
+*-*-irix6*)
+ # Find out which ABI we are using.
+ echo '[#]line __oline__ "configure"' > conftest.$ac_ext
+ if AC_TRY_EVAL(ac_compile); then
+ case "`/usr/bin/file conftest.o`" in
+ *32-bit*)
+ LD="${LD-ld} -32"
+ ;;
+ *N32*)
+ LD="${LD-ld} -n32"
+ ;;
+ *64-bit*)
+ LD="${LD-ld} -64"
+ ;;
+ esac
+ fi
+ rm -rf conftest*
+ ;;
+
+*-*-sco3.2v5*)
+ # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+ SAVE_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -belf"
+ AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+ [AC_LANG_SAVE
+ AC_LANG_C
+ AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+ AC_LANG_RESTORE])
+ if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+ # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+ CFLAGS="$SAVE_CFLAGS"
+ fi
+ ;;
+
+ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL],
+[*-*-cygwin* | *-*-mingw*)
+ AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+ AC_CHECK_TOOL(AS, as, false)
+ AC_CHECK_TOOL(OBJDUMP, objdump, false)
+
+ # recent cygwin and mingw systems supply a stub DllMain which the user
+ # can override, but on older systems we have to supply one
+ AC_CACHE_CHECK([if libtool should supply DllMain function], lt_cv_need_dllmain,
+ [AC_TRY_LINK([],
+ [extern int __attribute__((__stdcall__)) DllMain(void*, int, void*);
+ DllMain (0, 0, 0);],
+ [lt_cv_need_dllmain=no],[lt_cv_need_dllmain=yes])])
+
+ case "$lt_target/$CC" in
+ *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*)
+ # old mingw systems require "-dll" to link a DLL, while more recent ones
+ # require "-mdll"
+ SAVE_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -mdll"
+ AC_CACHE_CHECK([how to link DLLs], lt_cv_cc_dll_switch,
+ [AC_TRY_LINK([], [], [lt_cv_cc_dll_switch=-mdll],[lt_cv_cc_dll_switch=-dll])])
+ CFLAGS="$SAVE_CFLAGS" ;;
+ *-*-cygwin*)
+ # cygwin systems need to pass --dll to the linker, and not link
+ # crt.o which will require a WinMain@16 definition.
+ lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;;
+ esac
+ ;;
+ ])
+esac
+])
+
+# AC_LIBTOOL_DLOPEN - enable checks for dlopen support
+AC_DEFUN(AC_LIBTOOL_DLOPEN, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])])
+
+# AC_LIBTOOL_WIN32_DLL - declare package support for building win32 dll's
+AC_DEFUN(AC_LIBTOOL_WIN32_DLL, [AC_BEFORE([$0], [AC_LIBTOOL_SETUP])])
+
+# AC_ENABLE_SHARED - implement the --enable-shared flag
+# Usage: AC_ENABLE_SHARED[(DEFAULT)]
+# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to
+# `yes'.
+AC_DEFUN(AC_ENABLE_SHARED, [dnl
+define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(shared,
+changequote(<<, >>)dnl
+<< --enable-shared[=PKGS] build shared libraries [default=>>AC_ENABLE_SHARED_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_shared=yes ;;
+no) enable_shared=no ;;
+*)
+ enable_shared=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_shared=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac],
+enable_shared=AC_ENABLE_SHARED_DEFAULT)dnl
+])
+
+# AC_DISABLE_SHARED - set the default shared flag to --disable-shared
+AC_DEFUN(AC_DISABLE_SHARED, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_SHARED(no)])
+
+# AC_ENABLE_STATIC - implement the --enable-static flag
+# Usage: AC_ENABLE_STATIC[(DEFAULT)]
+# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to
+# `yes'.
+AC_DEFUN(AC_ENABLE_STATIC, [dnl
+define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(static,
+changequote(<<, >>)dnl
+<< --enable-static[=PKGS] build static libraries [default=>>AC_ENABLE_STATIC_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_static=yes ;;
+no) enable_static=no ;;
+*)
+ enable_static=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_static=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac],
+enable_static=AC_ENABLE_STATIC_DEFAULT)dnl
+])
+
+# AC_DISABLE_STATIC - set the default static flag to --disable-static
+AC_DEFUN(AC_DISABLE_STATIC, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_STATIC(no)])
+
+
+# AC_ENABLE_FAST_INSTALL - implement the --enable-fast-install flag
+# Usage: AC_ENABLE_FAST_INSTALL[(DEFAULT)]
+# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to
+# `yes'.
+AC_DEFUN(AC_ENABLE_FAST_INSTALL, [dnl
+define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(fast-install,
+changequote(<<, >>)dnl
+<< --enable-fast-install[=PKGS] optimize for fast installation [default=>>AC_ENABLE_FAST_INSTALL_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_fast_install=yes ;;
+no) enable_fast_install=no ;;
+*)
+ enable_fast_install=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_fast_install=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac],
+enable_fast_install=AC_ENABLE_FAST_INSTALL_DEFAULT)dnl
+])
+
+# AC_ENABLE_FAST_INSTALL - set the default to --disable-fast-install
+AC_DEFUN(AC_DISABLE_FAST_INSTALL, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_FAST_INSTALL(no)])
+
+
+# AC_PATH_TOOL_PREFIX - find a file program which can recognise shared library
+AC_DEFUN(AC_PATH_TOOL_PREFIX,
+[AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC,
+[case "$MAGIC" in
+ /*)
+ lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path.
+ ;;
+ ?:/*)
+ ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path.
+ ;;
+ *)
+ ac_save_MAGIC="$MAGIC"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word. This closes a longstanding sh security hole.
+ ac_dummy="ifelse([$2], , $PATH, [$2])"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$1; then
+ lt_cv_path_MAGIC="$ac_dir/$1"
+ if test -n "$file_magic_test_file"; then
+ case "$deplibs_check_method" in
+ "file_magic "*)
+ file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+ MAGIC="$lt_cv_path_MAGIC"
+ if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+ egrep "$file_magic_regex" > /dev/null; then
+ :
+ else
+ cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such. This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem. Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+ fi ;;
+ esac
+ fi
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ MAGIC="$ac_save_MAGIC"
+ ;;
+esac])
+MAGIC="$lt_cv_path_MAGIC"
+if test -n "$MAGIC"; then
+ AC_MSG_RESULT($MAGIC)
+else
+ AC_MSG_RESULT(no)
+fi
+])
+
+
+# AC_PATH_MAGIC - find a file program which can recognise a shared library
+AC_DEFUN(AC_PATH_MAGIC,
+[AC_REQUIRE([AC_CHECK_TOOL_PREFIX])dnl
+AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin:$PATH)
+if test -z "$lt_cv_path_MAGIC"; then
+ if test -n "$ac_tool_prefix"; then
+ AC_PATH_TOOL_PREFIX(file, /usr/bin:$PATH)
+ else
+ MAGIC=:
+ fi
+fi
+])
+
+
+# AC_PROG_LD - find the path to the GNU or non-GNU linker
+AC_DEFUN(AC_PROG_LD,
+[AC_ARG_WITH(gnu-ld,
+[ --with-gnu-ld assume the C compiler uses GNU ld [default=no]],
+test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no)
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+ac_prog=ld
+if test "$ac_cv_prog_gcc" = yes; then
+ # Check if gcc -print-prog-name=ld gives a path.
+ AC_MSG_CHECKING([for ld used by GCC])
+ case $lt_target in
+ *-*-mingw*)
+ # gcc leaves a trailing carriage return which upsets mingw
+ ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+ *)
+ ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+ esac
+ case "$ac_prog" in
+ # Accept absolute paths.
+changequote(,)dnl
+ [\\/]* | [A-Za-z]:[\\/]*)
+ re_direlt='/[^/][^/]*/\.\./'
+changequote([,])dnl
+ # Canonicalize the path of ld
+ ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
+ while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+ ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"`
+ done
+ test -z "$LD" && LD="$ac_prog"
+ ;;
+ "")
+ # If it fails, then pretend we aren't using GCC.
+ ac_prog=ld
+ ;;
+ *)
+ # If it is relative, then search for the first ld in PATH.
+ with_gnu_ld=unknown
+ ;;
+ esac
+elif test "$with_gnu_ld" = yes; then
+ AC_MSG_CHECKING([for GNU ld])
+else
+ AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(ac_cv_path_LD,
+[if test -z "$LD"; then
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+ ac_cv_path_LD="$ac_dir/$ac_prog"
+ # Check to see if the program is GNU ld. I'd rather use --version,
+ # but apparently some GNU ld's only accept -v.
+ # Break only if it was the GNU/non-GNU ld that we prefer.
+ if "$ac_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
+ test "$with_gnu_ld" != no && break
+ else
+ test "$with_gnu_ld" != yes && break
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+else
+ ac_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$ac_cv_path_LD"
+if test -n "$LD"; then
+ AC_MSG_RESULT($LD)
+else
+ AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+AC_PROG_LD_GNU
+])
+
+AC_DEFUN(AC_PROG_LD_GNU,
+[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], ac_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU ld's only accept -v.
+if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
+ ac_cv_prog_gnu_ld=yes
+else
+ ac_cv_prog_gnu_ld=no
+fi])
+with_gnu_ld=$ac_cv_prog_gnu_ld
+])
+
+# AC_PROG_LD_RELOAD_FLAG - find reload flag for linker
+# -- PORTME Some linkers may need a different reload flag.
+AC_DEFUN(AC_PROG_LD_RELOAD_FLAG,
+[AC_CACHE_CHECK([for $LD option to reload object files], lt_cv_ld_reload_flag,
+[lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+test -n "$reload_flag" && reload_flag=" $reload_flag"
+])
+
+# AC_DEPLIBS_CHECK_METHOD - how to check for library dependencies
+# -- PORTME fill in with the dynamic library characteristics
+AC_DEFUN(AC_DEPLIBS_CHECK_METHOD,
+[AC_CACHE_CHECK([how to recognise dependant libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='${MAGIC}'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [regex]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given egrep regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case "$host_os" in
+aix4* | beos*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+bsdi4*)
+ changequote(,)dnl
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+ changequote([, ])dnl
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ ;;
+
+cygwin* | mingw*)
+ lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+ lt_cv_file_magic_cmd='${OBJDUMP} -f'
+ ;;
+
+freebsd*)
+ case "$version_type" in
+ freebsd-elf*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ esac
+ ;;
+
+gnu*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+irix5* | irix6*)
+ case "$host_os" in
+ irix5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
+ ;;
+ *)
+ case "$LD" in
+ *-32|*"-32 ") libmagic=32-bit;;
+ *-n32|*"-n32 ") libmagic=N32;;
+ *-64|*"-64 ") libmagic=64-bit;;
+ *) libmagic=never-match;;
+ esac
+ # this will be overridden with pass_all, but let us keep it just in case
+ changequote(,)dnl
+ lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"
+ changequote([, ])dnl
+ ;;
+ esac
+ lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+ case "$host_cpu" in
+ alpha* | i*86 | powerpc* | sparc* )
+ lt_cv_deplibs_check_method=pass_all ;;
+ *)
+ # glibc up to 2.1.1 does not perform some relocations on ARM
+ changequote(,)dnl
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;
+ changequote([, ])dnl
+ esac
+ lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
+ ;;
+
+osf3* | osf4* | osf5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+sco3.2v5*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+solaris*)
+ lt_cv_deplibs_check_method=pass_all
+ lt_cv_file_magic_test_file=/lib/libc.so
+ ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ case "$host_vendor" in
+ ncr)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ motorola)
+ changequote(,)dnl
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+ changequote([, ])dnl
+ lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+ ;;
+ esac
+ ;;
+esac
+])
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+])
+
+
+# AC_PROG_NM - find the path to a BSD-compatible name lister
+AC_DEFUN(AC_PROG_NM,
+[AC_MSG_CHECKING([for BSD-compatible nm])
+AC_CACHE_VAL(ac_cv_path_NM,
+[if test -n "$NM"; then
+ # Let the user override the test.
+ ac_cv_path_NM="$NM"
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext ; then
+ # Check to see if the nm accepts a BSD-compat flag.
+ # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+ # nm: unknown option "B" ignored
+ if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -B"
+ break
+ elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -p"
+ break
+ else
+ ac_cv_path_NM=${ac_cv_path_NM="$ac_dir/nm"} # keep the first match, but
+ continue # so that we can try to find one that supports BSD flags
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+ test -z "$ac_cv_path_NM" && ac_cv_path_NM=nm
+fi])
+NM="$ac_cv_path_NM"
+AC_MSG_RESULT([$NM])
+])
+
+# AC_CHECK_LIBM - check for math library
+AC_DEFUN(AC_CHECK_LIBM,
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case "$lt_target" in
+*-*-beos* | *-*-cygwin*)
+ # These system don't have libm
+ ;;
+*-ncr-sysv4.3*)
+ AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+ AC_CHECK_LIB(m, main, LIBM="$LIBM -lm")
+ ;;
+*)
+ AC_CHECK_LIB(m, main, LIBM="-lm")
+ ;;
+esac
+])
+
+# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for
+# the libltdl convenience library, adds --enable-ltdl-convenience to
+# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
+# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
+# to be `${top_builddir}/libltdl'. Make sure you start DIR with
+# '${top_builddir}/' (note the single quotes!) if your package is not
+# flat, and, if you're not using automake, define top_builddir as
+# appropriate in the Makefiles.
+AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+ case "$enable_ltdl_convenience" in
+ no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
+ "") enable_ltdl_convenience=yes
+ ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
+ esac
+ LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la
+ INCLTDL=ifelse($#,1,-I$1,['-I${top_srcdir}/libltdl'])
+])
+
+# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for
+# the libltdl installable library, and adds --enable-ltdl-install to
+# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
+# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
+# to be `${top_builddir}/libltdl'. Make sure you start DIR with
+# '${top_builddir}/' (note the single quotes!) if your package is not
+# flat, and, if you're not using automake, define top_builddir as
+# appropriate in the Makefiles.
+# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
+AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+ AC_CHECK_LIB(ltdl, main,
+ [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no],
+ [if test x"$enable_ltdl_install" = xno; then
+ AC_MSG_WARN([libltdl not installed, but installation disabled])
+ else
+ enable_ltdl_install=yes
+ fi
+ ])
+ if test x"$enable_ltdl_install" = x"yes"; then
+ ac_configure_args="$ac_configure_args --enable-ltdl-install"
+ LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la
+ INCLTDL=ifelse($#,1,-I$1,['-I${top_srcdir}/libltdl'])
+ else
+ ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
+ LIBLTDL="-lltdl"
+ INCLTDL=
+ fi
+])
+
+dnl old names
+AC_DEFUN(AM_PROG_LIBTOOL, [indir([AC_PROG_LIBTOOL])])dnl
+AC_DEFUN(AM_ENABLE_SHARED, [indir([AC_ENABLE_SHARED], $@)])dnl
+AC_DEFUN(AM_ENABLE_STATIC, [indir([AC_ENABLE_STATIC], $@)])dnl
+AC_DEFUN(AM_DISABLE_SHARED, [indir([AC_DISABLE_SHARED], $@)])dnl
+AC_DEFUN(AM_DISABLE_STATIC, [indir([AC_DISABLE_STATIC], $@)])dnl
+AC_DEFUN(AM_PROG_LD, [indir([AC_PROG_LD])])dnl
+AC_DEFUN(AM_PROG_NM, [indir([AC_PROG_NM])])dnl
+
+dnl This is just to silence aclocal about the macro not being used
+ifelse([AC_DISABLE_FAST_INSTALL])dnl
+
diff --git a/rts/gmp/ansi2knr.1 b/rts/gmp/ansi2knr.1
new file mode 100644
index 0000000000..f9ee5a631c
--- /dev/null
+++ b/rts/gmp/ansi2knr.1
@@ -0,0 +1,36 @@
+.TH ANSI2KNR 1 "19 Jan 1996"
+.SH NAME
+ansi2knr \- convert ANSI C to Kernighan & Ritchie C
+.SH SYNOPSIS
+.I ansi2knr
+[--varargs] input_file [output_file]
+.SH DESCRIPTION
+If no output_file is supplied, output goes to stdout.
+.br
+There are no error messages.
+.sp
+.I ansi2knr
+recognizes function definitions by seeing a non-keyword identifier at the left
+margin, followed by a left parenthesis, with a right parenthesis as the last
+character on the line, and with a left brace as the first token on the
+following line (ignoring possible intervening comments). It will recognize a
+multi-line header provided that no intervening line ends with a left or right
+brace or a semicolon. These algorithms ignore whitespace and comments, except
+that the function name must be the first thing on the line.
+.sp
+The following constructs will confuse it:
+.br
+ - Any other construct that starts at the left margin and follows the
+above syntax (such as a macro or function call).
+.br
+ - Some macros that tinker with the syntax of the function header.
+.sp
+The --varargs switch is obsolete, and is recognized only for
+backwards compatibility. The present version of
+.I ansi2knr
+will always attempt to convert a ... argument to va_alist and va_dcl.
+.SH AUTHOR
+L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and
+continues to maintain the current version; most of the code in the current
+version is his work. ansi2knr also includes contributions by Francois
+Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>.
diff --git a/rts/gmp/ansi2knr.c b/rts/gmp/ansi2knr.c
new file mode 100644
index 0000000000..937c731886
--- /dev/null
+++ b/rts/gmp/ansi2knr.c
@@ -0,0 +1,677 @@
+/* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved. */
+
+/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
+
+/*
+ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY. No author or distributor accepts responsibility to anyone for the
+consequences of using it or for whether it serves any particular purpose or
+works at all, unless he says so in writing. Refer to the GNU General Public
+License (the "GPL") for full details.
+
+Everyone is granted permission to copy, modify and redistribute ansi2knr,
+but only under the conditions described in the GPL. A copy of this license
+is supposed to have been given to you along with ansi2knr so you can know
+your rights and responsibilities. It should be in a file named COPYLEFT,
+or, if there is no file named COPYLEFT, a file named COPYING. Among other
+things, the copyright notice and this notice must be preserved on all
+copies.
+
+We explicitly state here what we believe is already implied by the GPL: if
+the ansi2knr program is distributed as a separate set of sources and a
+separate executable file which are aggregated on a storage medium together
+with another program, this in itself does not bring the other program under
+the GPL, nor does the mere fact that such a program or the procedures for
+constructing it invoke the ansi2knr executable bring any other part of the
+program under the GPL.
+*/
+
+/*
+ * Usage:
+ ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
+ * --filename provides the file name for the #line directive in the output,
+ * overriding input_file (if present).
+ * If no input_file is supplied, input is read from stdin.
+ * If no output_file is supplied, output goes to stdout.
+ * There are no error messages.
+ *
+ * ansi2knr recognizes function definitions by seeing a non-keyword
+ * identifier at the left margin, followed by a left parenthesis,
+ * with a right parenthesis as the last character on the line,
+ * and with a left brace as the first token on the following line
+ * (ignoring possible intervening comments), except that a line
+ * consisting of only
+ * identifier1(identifier2)
+ * will not be considered a function definition unless identifier2 is
+ * the word "void", and a line consisting of
+ * identifier1(identifier2, <<arbitrary>>)
+ * will not be considered a function definition.
+ * ansi2knr will recognize a multi-line header provided
+ * that no intervening line ends with a left or right brace or a semicolon.
+ * These algorithms ignore whitespace and comments, except that
+ * the function name must be the first thing on the line.
+ * The following constructs will confuse it:
+ * - Any other construct that starts at the left margin and
+ * follows the above syntax (such as a macro or function call).
+ * - Some macros that tinker with the syntax of function headers.
+ */
+
+/*
+ * The original and principal author of ansi2knr is L. Peter Deutsch
+ * <ghost@aladdin.com>. Other authors are noted in the change history
+ * that follows (in reverse chronological order):
+ lpd 1999-04-12 added minor fixes from Pavel Roskin
+ <pavel_roskin@geocities.com> for clean compilation with
+ gcc -W -Wall
+ lpd 1999-03-22 added hack to recognize lines consisting of
+ identifier1(identifier2, xxx) as *not* being procedures
+ lpd 1999-02-03 made indentation of preprocessor commands consistent
+ lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
+ endless loop; quoted strings within an argument list
+ confused the parser
+ lpd 1999-01-24 added a check for write errors on the output,
+ suggested by Jim Meyering <meyering@ascend.com>
+ lpd 1998-11-09 added further hack to recognize identifier(void)
+ as being a procedure
+ lpd 1998-10-23 added hack to recognize lines consisting of
+ identifier1(identifier2) as *not* being procedures
+ lpd 1997-12-08 made input_file optional; only closes input and/or
+ output file if not stdin or stdout respectively; prints
+ usage message on stderr rather than stdout; adds
+ --filename switch (changes suggested by
+ <ceder@lysator.liu.se>)
+ lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
+ compilers that don't understand void, as suggested by
+ Tom Lane
+ lpd 1996-01-15 changed to require that the first non-comment token
+ on the line following a function header be a left brace,
+ to reduce sensitivity to macros, as suggested by Tom Lane
+ <tgl@sss.pgh.pa.us>
+ lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
+ undefined preprocessor symbols as 0; changed all #ifdefs
+ for configuration symbols to #ifs
+ lpd 1995-04-05 changed copyright notice to make it clear that
+ including ansi2knr in a program does not bring the entire
+ program under the GPL
+ lpd 1994-12-18 added conditionals for systems where ctype macros
+ don't handle 8-bit characters properly, suggested by
+ Francois Pinard <pinard@iro.umontreal.ca>;
+ removed --varargs switch (this is now the default)
+ lpd 1994-10-10 removed CONFIG_BROKETS conditional
+ lpd 1994-07-16 added some conditionals to help GNU `configure',
+ suggested by Francois Pinard <pinard@iro.umontreal.ca>;
+ properly erase prototype args in function parameters,
+ contributed by Jim Avera <jima@netcom.com>;
+ correct error in writeblanks (it shouldn't erase EOLs)
+ lpd 1989-xx-xx original version
+ */
+
+/* Most of the conditionals here are to make ansi2knr work with */
+/* or without the GNU configure machinery. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+
+#if HAVE_CONFIG_H
+
+/*
+ For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
+ This will define HAVE_CONFIG_H and so, activate the following lines.
+ */
+
+# if STDC_HEADERS || HAVE_STRING_H
+# include <string.h>
+# else
+# include <strings.h>
+# endif
+
+#else /* not HAVE_CONFIG_H */
+
+/* Otherwise do it the hard way */
+
+# ifdef BSD
+# include <strings.h>
+# else
+# ifdef VMS
+ extern int strlen(), strncmp();
+# else
+# include <string.h>
+# endif
+# endif
+
+#endif /* not HAVE_CONFIG_H */
+
+#if STDC_HEADERS
+# include <stdlib.h>
+#else
+/*
+ malloc and free should be declared in stdlib.h,
+ but if you've got a K&R compiler, they probably aren't.
+ */
+# ifdef MSDOS
+# include <malloc.h>
+# else
+# ifdef VMS
+ extern char *malloc();
+ extern void free();
+# else
+ extern char *malloc();
+ extern int free();
+# endif
+# endif
+
+#endif
+
+/* Define NULL (for *very* old compilers). */
+#ifndef NULL
+# define NULL (0)
+#endif
+
+/*
+ * The ctype macros don't always handle 8-bit characters correctly.
+ * Compensate for this here.
+ */
+#ifdef isascii
+# undef HAVE_ISASCII /* just in case */
+# define HAVE_ISASCII 1
+#else
+#endif
+#if STDC_HEADERS || !HAVE_ISASCII
+# define is_ascii(c) 1
+#else
+# define is_ascii(c) isascii(c)
+#endif
+
+#define is_space(c) (is_ascii(c) && isspace(c))
+#define is_alpha(c) (is_ascii(c) && isalpha(c))
+#define is_alnum(c) (is_ascii(c) && isalnum(c))
+
+/* Scanning macros */
+#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
+#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
+
+/* Forward references */
+char *skipspace();
+char *scanstring();
+int writeblanks();
+int test1();
+int convert1();
+
+/* The main program */
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{ FILE *in = stdin;
+ FILE *out = stdout;
+ char *filename = 0;
+ char *program_name = argv[0];
+ char *output_name = 0;
+#define bufsize 5000 /* arbitrary size */
+ char *buf;
+ char *line;
+ char *more;
+ char *usage =
+ "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
+ /*
+ * In previous versions, ansi2knr recognized a --varargs switch.
+ * If this switch was supplied, ansi2knr would attempt to convert
+ * a ... argument to va_alist and va_dcl; if this switch was not
+ * supplied, ansi2knr would simply drop any such arguments.
+ * Now, ansi2knr always does this conversion, and we only
+ * check for this switch for backward compatibility.
+ */
+ int convert_varargs = 1;
+ int output_error;
+
+ while ( argc > 1 && argv[1][0] == '-' ) {
+ if ( !strcmp(argv[1], "--varargs") ) {
+ convert_varargs = 1;
+ argc--;
+ argv++;
+ continue;
+ }
+ if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
+ filename = argv[2];
+ argc -= 2;
+ argv += 2;
+ continue;
+ }
+ fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
+ argv[1]);
+ fprintf(stderr, usage);
+ exit(1);
+ }
+ switch ( argc )
+ {
+ default:
+ fprintf(stderr, usage);
+ exit(0);
+ case 3:
+ output_name = argv[2];
+ out = fopen(output_name, "w");
+ if ( out == NULL ) {
+ fprintf(stderr, "%s: Cannot open output file %s\n",
+ program_name, output_name);
+ exit(1);
+ }
+ /* falls through */
+ case 2:
+ in = fopen(argv[1], "r");
+ if ( in == NULL ) {
+ fprintf(stderr, "%s: Cannot open input file %s\n",
+ program_name, argv[1]);
+ exit(1);
+ }
+ if ( filename == 0 )
+ filename = argv[1];
+ /* falls through */
+ case 1:
+ break;
+ }
+ if ( filename )
+ fprintf(out, "#line 1 \"%s\"\n", filename);
+ buf = malloc(bufsize);
+ if ( buf == NULL )
+ {
+ fprintf(stderr, "Unable to allocate read buffer!\n");
+ exit(1);
+ }
+ line = buf;
+ while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
+ {
+test: line += strlen(line);
+ switch ( test1(buf) )
+ {
+ case 2: /* a function header */
+ convert1(buf, out, 1, convert_varargs);
+ break;
+ case 1: /* a function */
+ /* Check for a { at the start of the next line. */
+ more = ++line;
+f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
+ goto wl;
+ if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
+ goto wl;
+ switch ( *skipspace(more, 1) )
+ {
+ case '{':
+ /* Definitely a function header. */
+ convert1(buf, out, 0, convert_varargs);
+ fputs(more, out);
+ break;
+ case 0:
+ /* The next line was blank or a comment: */
+ /* keep scanning for a non-comment. */
+ line += strlen(line);
+ goto f;
+ default:
+ /* buf isn't a function header, but */
+ /* more might be. */
+ fputs(buf, out);
+ strcpy(buf, more);
+ line = buf;
+ goto test;
+ }
+ break;
+ case -1: /* maybe the start of a function */
+ if ( line != buf + (bufsize - 1) ) /* overflow check */
+ continue;
+ /* falls through */
+ default: /* not a function */
+wl: fputs(buf, out);
+ break;
+ }
+ line = buf;
+ }
+ if ( line != buf )
+ fputs(buf, out);
+ free(buf);
+ if ( output_name ) {
+ output_error = ferror(out);
+ output_error |= fclose(out);
+ } else { /* out == stdout */
+ fflush(out);
+ output_error = ferror(out);
+ }
+ if ( output_error ) {
+ fprintf(stderr, "%s: error writing to %s\n", program_name,
+ (output_name ? output_name : "stdout"));
+ exit(1);
+ }
+ if ( in != stdin )
+ fclose(in);
+ return 0;
+}
+
+/* Skip over whitespace and comments, in either direction. */
+char *
+skipspace(p, dir)
+ register char *p;
+ register int dir; /* 1 for forward, -1 for backward */
+{ for ( ; ; )
+ { while ( is_space(*p) )
+ p += dir;
+ if ( !(*p == '/' && p[dir] == '*') )
+ break;
+ p += dir; p += dir;
+ while ( !(*p == '*' && p[dir] == '/') )
+ { if ( *p == 0 )
+ return p; /* multi-line comment?? */
+ p += dir;
+ }
+ p += dir; p += dir;
+ }
+ return p;
+}
+
+/* Scan over a quoted string, in either direction. */
+char *
+scanstring(p, dir)
+ register char *p;
+ register int dir;
+{
+ for (p += dir; ; p += dir)
+ if (*p == '"' && p[-dir] != '\\')
+ return p + dir;
+}
+
+/*
+ * Write blanks over part of a string.
+ * Don't overwrite end-of-line characters.
+ */
+int
+writeblanks(start, end)
+ char *start;
+ char *end;
+{ char *p;
+ for ( p = start; p < end; p++ )
+ if ( *p != '\r' && *p != '\n' )
+ *p = ' ';
+ return 0;
+}
+
+/*
+ * Test whether the string in buf is a function definition.
+ * The string may contain and/or end with a newline.
+ * Return as follows:
+ * 0 - definitely not a function definition;
+ * 1 - definitely a function definition;
+ * 2 - definitely a function prototype (NOT USED);
+ * -1 - may be the beginning of a function definition,
+ * append another line and look again.
+ * The reason we don't attempt to convert function prototypes is that
+ * Ghostscript's declaration-generating macros look too much like
+ * prototypes, and confuse the algorithms.
+ */
+int
+test1(buf)
+ char *buf;
+{ register char *p = buf;
+ char *bend;
+ char *endfn;
+ int contin;
+
+ if ( !isidfirstchar(*p) )
+ return 0; /* no name at left margin */
+ bend = skipspace(buf + strlen(buf) - 1, -1);
+ switch ( *bend )
+ {
+ case ';': contin = 0 /*2*/; break;
+ case ')': contin = 1; break;
+ case '{': return 0; /* not a function */
+ case '}': return 0; /* not a function */
+ default: contin = -1;
+ }
+ while ( isidchar(*p) )
+ p++;
+ endfn = p;
+ p = skipspace(p, 1);
+ if ( *p++ != '(' )
+ return 0; /* not a function */
+ p = skipspace(p, 1);
+ if ( *p == ')' )
+ return 0; /* no parameters */
+ /* Check that the apparent function name isn't a keyword. */
+ /* We only need to check for keywords that could be followed */
+ /* by a left parenthesis (which, unfortunately, is most of them). */
+ { static char *words[] =
+ { "asm", "auto", "case", "char", "const", "double",
+ "extern", "float", "for", "if", "int", "long",
+ "register", "return", "short", "signed", "sizeof",
+ "static", "switch", "typedef", "unsigned",
+ "void", "volatile", "while", 0
+ };
+ char **key = words;
+ char *kp;
+ unsigned len = endfn - buf;
+
+ while ( (kp = *key) != 0 )
+ { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
+ return 0; /* name is a keyword */
+ key++;
+ }
+ }
+ {
+ char *id = p;
+ int len;
+ /*
+ * Check for identifier1(identifier2) and not
+ * identifier1(void), or identifier1(identifier2, xxxx).
+ */
+
+ while ( isidchar(*p) )
+ p++;
+ len = p - id;
+ p = skipspace(p, 1);
+ if (*p == ',' ||
+ (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
+ )
+ return 0; /* not a function */
+ }
+ /*
+ * If the last significant character was a ), we need to count
+ * parentheses, because it might be part of a formal parameter
+ * that is a procedure.
+ */
+ if (contin > 0) {
+ int level = 0;
+
+ for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
+ level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
+ if (level > 0)
+ contin = -1;
+ }
+ return contin;
+}
+
+/* Convert a recognized function definition or header to K&R syntax. */
+int
+convert1(buf, out, header, convert_varargs)
+ char *buf;
+ FILE *out;
+ int header; /* Boolean */
+ int convert_varargs; /* Boolean */
+{ char *endfn;
+ register char *p;
+ /*
+ * The breaks table contains pointers to the beginning and end
+ * of each argument.
+ */
+ char **breaks;
+ unsigned num_breaks = 2; /* for testing */
+ char **btop;
+ char **bp;
+ char **ap;
+ char *vararg = 0;
+
+ /* Pre-ANSI implementations don't agree on whether strchr */
+ /* is called strchr or index, so we open-code it here. */
+ for ( endfn = buf; *(endfn++) != '('; )
+ ;
+top: p = endfn;
+ breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
+ if ( breaks == NULL )
+ { /* Couldn't allocate break table, give up */
+ fprintf(stderr, "Unable to allocate break table!\n");
+ fputs(buf, out);
+ return -1;
+ }
+ btop = breaks + num_breaks * 2 - 2;
+ bp = breaks;
+ /* Parse the argument list */
+ do
+ { int level = 0;
+ char *lp = NULL;
+ char *rp = NULL;
+ char *end = NULL;
+
+ if ( bp >= btop )
+ { /* Filled up break table. */
+ /* Allocate a bigger one and start over. */
+ free((char *)breaks);
+ num_breaks <<= 1;
+ goto top;
+ }
+ *bp++ = p;
+ /* Find the end of the argument */
+ for ( ; end == NULL; p++ )
+ { switch(*p)
+ {
+ case ',':
+ if ( !level ) end = p;
+ break;
+ case '(':
+ if ( !level ) lp = p;
+ level++;
+ break;
+ case ')':
+ if ( --level < 0 ) end = p;
+ else rp = p;
+ break;
+ case '/':
+ if (p[1] == '*')
+ p = skipspace(p, 1) - 1;
+ break;
+ case '"':
+ p = scanstring(p, 1) - 1;
+ break;
+ default:
+ ;
+ }
+ }
+ /* Erase any embedded prototype parameters. */
+ if ( lp && rp )
+ writeblanks(lp + 1, rp);
+ p--; /* back up over terminator */
+ /* Find the name being declared. */
+ /* This is complicated because of procedure and */
+ /* array modifiers. */
+ for ( ; ; )
+ { p = skipspace(p - 1, -1);
+ switch ( *p )
+ {
+ case ']': /* skip array dimension(s) */
+ case ')': /* skip procedure args OR name */
+ { int level = 1;
+ while ( level )
+ switch ( *--p )
+ {
+ case ']': case ')':
+ level++;
+ break;
+ case '[': case '(':
+ level--;
+ break;
+ case '/':
+ if (p > buf && p[-1] == '*')
+ p = skipspace(p, -1) + 1;
+ break;
+ case '"':
+ p = scanstring(p, -1) + 1;
+ break;
+ default: ;
+ }
+ }
+ if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
+ { /* We found the name being declared */
+ while ( !isidfirstchar(*p) )
+ p = skipspace(p, 1) + 1;
+ goto found;
+ }
+ break;
+ default:
+ goto found;
+ }
+ }
+found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
+ { if ( convert_varargs )
+ { *bp++ = "va_alist";
+ vararg = p-2;
+ }
+ else
+ { p++;
+ if ( bp == breaks + 1 ) /* sole argument */
+ writeblanks(breaks[0], p);
+ else
+ writeblanks(bp[-1] - 1, p);
+ bp--;
+ }
+ }
+ else
+ { while ( isidchar(*p) ) p--;
+ *bp++ = p+1;
+ }
+ p = end;
+ }
+ while ( *p++ == ',' );
+ *bp = p;
+ /* Make a special check for 'void' arglist */
+ if ( bp == breaks+2 )
+ { p = skipspace(breaks[0], 1);
+ if ( !strncmp(p, "void", 4) )
+ { p = skipspace(p+4, 1);
+ if ( p == breaks[2] - 1 )
+ { bp = breaks; /* yup, pretend arglist is empty */
+ writeblanks(breaks[0], p + 1);
+ }
+ }
+ }
+ /* Put out the function name and left parenthesis. */
+ p = buf;
+ while ( p != endfn ) putc(*p, out), p++;
+ /* Put out the declaration. */
+ if ( header )
+ { fputs(");", out);
+ for ( p = breaks[0]; *p; p++ )
+ if ( *p == '\r' || *p == '\n' )
+ putc(*p, out);
+ }
+ else
+ { for ( ap = breaks+1; ap < bp; ap += 2 )
+ { p = *ap;
+ while ( isidchar(*p) )
+ putc(*p, out), p++;
+ if ( ap < bp - 1 )
+ fputs(", ", out);
+ }
+ fputs(") ", out);
+ /* Put out the argument declarations */
+ for ( ap = breaks+2; ap <= bp; ap += 2 )
+ (*ap)[-1] = ';';
+ if ( vararg != 0 )
+ { *vararg = 0;
+ fputs(breaks[0], out); /* any prior args */
+ fputs("va_dcl", out); /* the final arg */
+ fputs(bp[0], out);
+ }
+ else
+ fputs(breaks[0], out);
+ }
+ free((char *)breaks);
+ return 0;
+}
diff --git a/rts/gmp/assert.c b/rts/gmp/assert.c
new file mode 100644
index 0000000000..65eccfa30b
--- /dev/null
+++ b/rts/gmp/assert.c
@@ -0,0 +1,52 @@
+/* GMP assertion failure handler. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if __STDC__
+__gmp_assert_fail (const char *filename, int linenum,
+ const char *expr)
+#else
+__gmp_assert_fail (filename, linenum, expr)
+char *filename;
+int linenum;
+char *expr;
+#endif
+{
+ if (filename != NULL && filename[0] != '\0')
+ {
+ fprintf (stderr, "%s:", filename);
+ if (linenum != -1)
+ fprintf (stderr, "%d: ", linenum);
+ }
+
+ fprintf (stderr, "GNU MP assertion failed: %s\n", expr);
+ abort();
+
+ /*NOTREACHED*/
+ return 0;
+}
diff --git a/rts/gmp/compat.c b/rts/gmp/compat.c
new file mode 100644
index 0000000000..ab7529f52f
--- /dev/null
+++ b/rts/gmp/compat.c
@@ -0,0 +1,46 @@
+/* Old function entrypoints retained for binary compatibility. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* mpn_divexact_by3 was a function in gmp 3.0, but as of gmp 3.1 it's a
+ macro calling mpn_divexact_by3c. */
+int
+__MPN (divexact_by3) (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+ mpn_divexact_by3 (dst, src, size);
+}
+
+
+/* mpn_divmod_1 was a function in gmp 3.0 and earlier, but marked obsolete
+ in gmp 2 and 3. As of gmp 3.1 it's a macro calling mpn_divrem_1. */
+int
+__MPN (divmod_1) (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+ mpn_divmod_1 (dst, src, size, divisor);
+}
+
+
diff --git a/rts/gmp/config.guess b/rts/gmp/config.guess
new file mode 100644
index 0000000000..08018f497d
--- /dev/null
+++ b/rts/gmp/config.guess
@@ -0,0 +1,1373 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+# Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Written by Per Bothner <bothner@cygnus.com>.
+# Please send patches to <config-patches@gnu.org>.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub. If it succeeds, it prints the system name on stdout, and
+# exits with 0. Otherwise, it exits with 1.
+#
+# The plan is that this can be called by configure scripts if you
+# don't specify an explicit system type (host/target name).
+#
+# Only a few systems have been added to this list; please add others
+# (but try to keep the structure clean).
+#
+
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 8/24/94.)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+ PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+dummy=dummy-$$
+trap 'rm -f $dummy.c $dummy.o $dummy ${dummy}1.s ${dummy}2.c ; exit 1' 1 2 15
+
+# Use $HOST_CC if defined. $CC may point to a cross-compiler
+if test x"$CC_FOR_BUILD" = x; then
+ if test x"$HOST_CC" != x; then
+ CC_FOR_BUILD="$HOST_CC"
+ else
+ if test x"$CC" != x; then
+ CC_FOR_BUILD="$CC"
+ else
+ echo 'dummy(){}' >$dummy.c
+ for c in cc c89 gcc; do
+ ($c $dummy.c -c) >/dev/null 2>&1
+ if test $? = 0; then
+ CC_FOR_BUILD="$c"; break
+ fi
+ done
+ rm -f $dummy.c $dummy.o
+ if test x"$CC_FOR_BUILD" = x; then
+ CC_FOR_BUILD=no_compiler_found
+ fi
+ fi
+ fi
+fi
+
+
+# First make a best effort at recognizing x86 CPU type and leave it in X86CPU.
+# If we fail, set X86CPU to UNAME_MACHINE
+#
+# DJGPP v2 (or 2.03 at least) always gives "pc" for uname -m, and the
+# OEM for uname -s. Eg. pc:MS-DOS:6:2 on MS-DOS 6.21. The list of
+# possible OEMs is in src/libc/dos/dos/getdos_v.c of djlsr203.zip, but
+# just pc:*:*:* seems ok.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+ i?86:*:*:* | i86pc:*:*:* | pc:*:*:*)
+ case "${UNAME_MACHINE}" in
+ i86pc | pc) UNAME_MACHINE=i386 ;;
+ esac
+ cat <<EOF >${dummy}1.s
+ .globl cpuid
+ .globl _cpuid
+cpuid:
+_cpuid:
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%eax
+ .byte 0x0f
+ .byte 0xa2
+ movl 12(%esp),%esi
+ movl %ebx,(%esi)
+ movl %edx,4(%esi)
+ movl %ecx,8(%esi)
+ popl %ebx
+ popl %esi
+ ret
+EOF
+ cat <<EOF >${dummy}2.c
+main ()
+{
+ char vendor_string[13];
+ char dummy_string[12];
+ long fms;
+ int family, model;
+ char *modelstr;
+
+ cpuid (vendor_string, 0);
+ vendor_string[12] = 0;
+
+ fms = cpuid (dummy_string, 1);
+
+ family = (fms >> 8) & 15;
+ model = (fms >> 4) & 15;
+
+ modelstr = "i486";
+ if (strcmp (vendor_string, "GenuineIntel") == 0)
+ {
+ switch (family)
+ {
+ case 5:
+ if (model <= 2)
+ modelstr = "pentium";
+ else if (model >= 4)
+ modelstr = "pentiummmx";
+ break;
+ case 6:
+ if (model == 1)
+ modelstr = "pentiumpro";
+ else if (model <= 6)
+ modelstr = "pentium2";
+ else
+ modelstr = "pentium3";
+ break;
+ }
+ }
+ else if (strcmp (vendor_string, "AuthenticAMD") == 0)
+ {
+ switch (family)
+ {
+ case 5:
+ if (model <= 3)
+ modelstr = "k5";
+ else if (model <= 7)
+ modelstr = "k6";
+ else if (model <= 8)
+ modelstr = "k62";
+ else if (model <= 9)
+ modelstr = "k63";
+ break;
+ case 6:
+ modelstr = "athlon";
+ break;
+ }
+ }
+ else if (strcmp (vendor_string, "CyrixInstead") == 0)
+ {
+ /* Should recognize Cyrix' processors too. */
+ }
+
+ printf ("%s\n", modelstr);
+ return 0;
+}
+EOF
+ $CC_FOR_BUILD ${dummy}1.s ${dummy}2.c -o $dummy >/dev/null 2>&1
+ if test "$?" = 0 ; then
+ X86CPU=`./$dummy`
+ fi
+
+
+ # Default to believing uname -m if the program fails to compile or
+ # run. Will fail to run on 386 since cpuid was only added on 486.
+ if test -z "$X86CPU"
+ then
+ X86CPU="$UNAME_MACHINE"
+ fi
+ rm -f ${dummy}1.s ${dummy}2.c $dummy
+ ;;
+esac
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+ alpha:OSF1:*:*)
+ if test $UNAME_RELEASE = "V4.0"; then
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+ fi
+ # A Vn.n version is a released version.
+ # A Tn.n version is a released field test version.
+ # A Xn.n version is an unreleased experimental baselevel.
+ # 1.2 uses "1.2" for uname -r.
+ cat <<EOF >$dummy.s
+ .data
+\$Lformat:
+ .byte 37,100,45,37,120,10,0 # "%d-%x\n"
+
+ .text
+ .globl main
+ .align 4
+ .ent main
+main:
+ .frame \$30,16,\$26,0
+ ldgp \$29,0(\$27)
+ .prologue 1
+ .long 0x47e03d80 # implver \$0
+ lda \$2,-1
+ .long 0x47e20c21 # amask \$2,\$1
+ lda \$16,\$Lformat
+ mov \$0,\$17
+ not \$1,\$18
+ jsr \$26,printf
+ ldgp \$29,0(\$26)
+ mov 0,\$16
+ jsr \$26,exit
+ .end main
+EOF
+ $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ case `./$dummy` in
+ 0-0)
+ UNAME_MACHINE="alpha"
+ ;;
+ 1-0)
+ UNAME_MACHINE="alphaev5"
+ ;;
+ 1-1)
+ UNAME_MACHINE="alphaev56"
+ ;;
+ 1-101)
+ UNAME_MACHINE="alphapca56"
+ ;;
+ 2-303)
+ UNAME_MACHINE="alphaev6"
+ ;;
+ 2-307)
+ UNAME_MACHINE="alphaev67"
+ ;;
+ esac
+ fi
+ rm -f $dummy.s $dummy
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ exit 0 ;;
+ alpha:NetBSD:*:* | alpha:FreeBSD:*:*)
+ cat <<EOF >$dummy.s
+ .globl main
+ .ent main
+main:
+ .frame \$30,0,\$26,0
+ .prologue 0
+ .long 0x47e03d80 # implver $0
+ lda \$2,259
+ .long 0x47e20c21 # amask $2,$1
+ srl \$1,8,\$2
+ sll \$2,2,\$2
+ sll \$0,3,\$0
+ addl \$1,\$0,\$0
+ addl \$2,\$0,\$0
+ ret \$31,(\$26),1
+ .end main
+EOF
+ $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ ./$dummy
+ case "$?" in
+ 7)
+ UNAME_MACHINE="alpha"
+ ;;
+ 15)
+ UNAME_MACHINE="alphaev5"
+ ;;
+ 14)
+ UNAME_MACHINE="alphaev56"
+ ;;
+ 10)
+ UNAME_MACHINE="alphapca56"
+ ;;
+ 16)
+ UNAME_MACHINE="alphaev6"
+ ;;
+ esac
+ fi
+ rm -f $dummy.s $dummy
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM}${UNAME_RELEASE} | sed -e 's/^[VTX]//' -e 's/[-(].*//' | tr [[A-Z]] [[a-z]]`
+ exit 0 ;;
+ Alpha\ *:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # Should we change UNAME_MACHINE based on the output of uname instead
+ # of the specific Alpha model?
+ echo alpha-pc-interix
+ exit 0 ;;
+ 21064:Windows_NT:50:3)
+ echo alpha-dec-winnt3.5
+ exit 0 ;;
+ Amiga*:UNIX_System_V:4.0:*)
+ echo m68k-cbm-sysv4
+ exit 0;;
+ amiga:NetBSD:*:*)
+ echo m68k-cbm-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ amiga:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ *:[Aa]miga[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-unknown-amigaos
+ exit 0 ;;
+ arc64:OpenBSD:*:*)
+ echo mips64el-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ arc:OpenBSD:*:*)
+ echo mipsel-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ hkmips:OpenBSD:*:*)
+ echo mips-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ pmax:OpenBSD:*:*)
+ echo mipsel-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ sgi:OpenBSD:*:*)
+ echo mips-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ wgrisc:OpenBSD:*:*)
+ echo mipsel-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ *:OS/390:*:*)
+ echo i370-ibm-openedition
+ exit 0 ;;
+ arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+ echo arm-acorn-riscix${UNAME_RELEASE}
+ exit 0;;
+ arm32:NetBSD:*:*)
+ echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ SR2?01:HI-UX/MPP:*:*)
+ echo hppa1.1-hitachi-hiuxmpp
+ exit 0;;
+ Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+ # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+ if test "`(/bin/universe) 2>/dev/null`" = att ; then
+ echo pyramid-pyramid-sysv3
+ else
+ echo pyramid-pyramid-bsd
+ fi
+ exit 0 ;;
+ NILE*:*:*:dcosx)
+ echo pyramid-pyramid-svr4
+ exit 0 ;;
+ sun4H:SunOS:5.*:*)
+ echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4[md]:SunOS:5.*:*)
+ echo sparcv8-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4u:SunOS:5.*:*)
+ echo sparcv9-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+ echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ i386:SunOS:5.*:*)
+ echo ${X86CPU}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4[md]:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+ UNAME_RELEASE=`uname -v`
+ ;;
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparcv8-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ exit 0 ;;
+ sun4*:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+ UNAME_RELEASE=`uname -v`
+ ;;
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ exit 0 ;;
+ sun3*:SunOS:*:*)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ exit 0 ;;
+ sun*:*:4.2BSD:*)
+ UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+ test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+ case "`/bin/arch`" in
+ sun3)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ ;;
+ sun4)
+ echo sparc-sun-sunos${UNAME_RELEASE}
+ ;;
+ esac
+ exit 0 ;;
+ aushp:SunOS:*:*)
+ echo sparc-auspex-sunos${UNAME_RELEASE}
+ exit 0 ;;
+ atari*:NetBSD:*:*)
+ echo m68k-atari-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ atari*:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ # The situation for MiNT is a little confusing. The machine name
+ # can be virtually everything (everything which is not
+ # "atarist" or "atariste" at least should have a processor
+ # > m68000). The system name ranges from "MiNT" over "FreeMiNT"
+ # to the lowercase version "mint" (or "freemint"). Finally
+ # the system name "TOS" denotes a system which is actually not
+ # MiNT. But MiNT is downward compatible to TOS, so this should
+ # be no problem.
+ atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit 0 ;;
+ atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit 0 ;;
+ *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit 0 ;;
+ milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+ echo m68k-milan-mint${UNAME_RELEASE}
+ exit 0 ;;
+ hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+ echo m68k-hades-mint${UNAME_RELEASE}
+ exit 0 ;;
+ *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+ echo m68k-unknown-mint${UNAME_RELEASE}
+ exit 0 ;;
+ sun3*:NetBSD:*:*)
+ echo m68k-sun-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ sun3*:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mac68k:NetBSD:*:*)
+ echo m68k-apple-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mac68k:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ macppc:NetBSD:*:*)
+ echo powerpc-apple-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mvme68k:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mvme88k:OpenBSD:*:*)
+ echo m88k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ powerpc:machten:*:*)
+ echo powerpc-apple-machten${UNAME_RELEASE}
+ exit 0 ;;
+ RISC*:Mach:*:*)
+ echo mips-dec-mach_bsd4.3
+ exit 0 ;;
+ RISC*:ULTRIX:*:*)
+ echo mips-dec-ultrix${UNAME_RELEASE}
+ exit 0 ;;
+ VAX*:ULTRIX*:*:*)
+ echo vax-dec-ultrix${UNAME_RELEASE}
+ exit 0 ;;
+ 2020:CLIX:*:* | 2430:CLIX:*:*)
+ echo clipper-intergraph-clix${UNAME_RELEASE}
+ exit 0 ;;
+ mips:*:*:UMIPS | mips:*:*:RISCos)
+ sed 's/^ //' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+ #if defined (host_mips) && defined (MIPSEB)
+ #if defined (SYSTYPE_SYSV)
+ printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_SVR4)
+ printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+ printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+ #endif
+ #endif
+ exit (-1);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy \
+ && ./$dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
+ && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ echo mips-mips-riscos${UNAME_RELEASE}
+ exit 0 ;;
+ Night_Hawk:Power_UNIX:*:*)
+ echo powerpc-harris-powerunix
+ exit 0 ;;
+ m88k:CX/UX:7*:*)
+ echo m88k-harris-cxux7
+ exit 0 ;;
+ m88k:*:4*:R4*)
+ echo m88k-motorola-sysv4
+ exit 0 ;;
+ m88k:*:3*:R3*)
+ echo m88k-motorola-sysv3
+ exit 0 ;;
+ AViiON:dgux:*:*)
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+ then
+ if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+ [ ${TARGET_BINARY_INTERFACE}x = x ]
+ then
+ echo m88k-dg-dgux${UNAME_RELEASE}
+ else
+ echo m88k-dg-dguxbcs${UNAME_RELEASE}
+ fi
+ else
+ echo i586-dg-dgux${UNAME_RELEASE}
+ fi
+ exit 0 ;;
+ M88*:DolphinOS:*:*) # DolphinOS (SVR3)
+ echo m88k-dolphin-sysv3
+ exit 0 ;;
+ M88*:*:R3*:*)
+ # Delta 88k system running SVR3
+ echo m88k-motorola-sysv3
+ exit 0 ;;
+ XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+ echo m88k-tektronix-sysv3
+ exit 0 ;;
+ Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+ echo m68k-tektronix-bsd
+ exit 0 ;;
+ *:IRIX*:*:*)
+ echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+ exit 0 ;;
+ ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+ echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
+ exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX '
+ i?86:AIX:*:*)
+ echo i386-ibm-aix
+ exit 0 ;;
+ *:AIX:2:3)
+ if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+ sed 's/^ //' << EOF >$dummy.c
+ #include <sys/systemcfg.h>
+
+ main()
+ {
+ if (!__power_pc())
+ exit(1);
+ puts("powerpc-ibm-aix3.2.5");
+ exit(0);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ echo rs6000-ibm-aix3.2.5
+ elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+ echo rs6000-ibm-aix3.2.4
+ else
+ echo rs6000-ibm-aix3.2
+ fi
+ exit 0 ;;
+ *:AIX:*:4)
+ sed 's/^ //' << EOF >$dummy.c
+ #include <stdio.h>
+ #include <sys/systemcfg.h>
+ main ()
+ {
+ if (_system_configuration.architecture == POWER_RS
+ || _system_configuration.implementation == POWER_601)
+ puts ("power");
+ else
+ {
+ if (_system_configuration.width == 64)
+ puts ("powerpc64");
+ else
+ puts ("powerpc");
+ }
+ exit (0);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy
+ IBM_ARCH=`./$dummy`
+ rm -f $dummy.c $dummy
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+ else
+ IBM_REV=4.${UNAME_RELEASE}
+ fi
+ echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+ exit 0 ;;
+ *:AIX:*:*)
+ echo rs6000-ibm-aix
+ exit 0 ;;
+ ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+ echo romp-ibm-bsd4.4
+ exit 0 ;;
+ ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
+ echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
+ exit 0 ;; # report: romp-ibm BSD 4.3
+ *:BOSX:*:*)
+ echo rs6000-bull-bosx
+ exit 0 ;;
+ DPX/2?00:B.O.S.:*:*)
+ echo m68k-bull-sysv3
+ exit 0 ;;
+ 9000/[34]??:4.3bsd:1.*:*)
+ echo m68k-hp-bsd
+ exit 0 ;;
+ hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+ echo m68k-hp-bsd4.4
+ exit 0 ;;
+ 9000/[34678]??:HP-UX:*:*)
+ case "${UNAME_MACHINE}" in
+ 9000/31? ) HP_ARCH=m68000 ;;
+ 9000/[34]?? ) HP_ARCH=m68k ;;
+ 9000/[678][0-9][0-9])
+ sed 's/^ //' << EOF >$dummy.c
+
+ #define _HPUX_SOURCE
+ #include <stdlib.h>
+ #include <unistd.h>
+
+ int main ()
+ {
+ #if defined(_SC_KERNEL_BITS)
+ long bits = sysconf(_SC_KERNEL_BITS);
+ #endif
+ long cpu = sysconf (_SC_CPU_VERSION);
+
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+ case CPU_PA_RISC2_0:
+ #if defined(_SC_KERNEL_BITS)
+ switch (bits)
+ {
+ case 64: puts ("hppa2.0w"); break;
+ case 32: puts ("hppa2.0n"); break;
+ default: puts ("hppa2.0"); break;
+ } break;
+ #else /* !defined(_SC_KERNEL_BITS) */
+ puts ("hppa2.0"); break;
+ #endif
+ default: puts ("hppa1.0"); break;
+ }
+ exit (0);
+ }
+EOF
+ (CCOPTS= $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null ) && HP_ARCH=`./$dummy`
+ rm -f $dummy.c $dummy
+ esac
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+ exit 0 ;;
+ 3050*:HI-UX:*:*)
+ sed 's/^ //' << EOF >$dummy.c
+ #include <unistd.h>
+ int
+ main ()
+ {
+ long cpu = sysconf (_SC_CPU_VERSION);
+ /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+ true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct
+ results, however. */
+ if (CPU_IS_PA_RISC (cpu))
+ {
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+ default: puts ("hppa-hitachi-hiuxwe2"); break;
+ }
+ }
+ else if (CPU_IS_HP_MC68K (cpu))
+ puts ("m68k-hitachi-hiuxwe2");
+ else puts ("unknown-hitachi-hiuxwe2");
+ exit (0);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ echo unknown-hitachi-hiuxwe2
+ exit 0 ;;
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+ echo hppa1.1-hp-bsd
+ exit 0 ;;
+ 9000/8??:4.3bsd:*:*)
+ echo hppa1.0-hp-bsd
+ exit 0 ;;
+ *9??*:MPE/iX:*:*)
+ echo hppa1.0-hp-mpeix
+ exit 0 ;;
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+ echo hppa1.1-hp-osf
+ exit 0 ;;
+ hp8??:OSF1:*:*)
+ echo hppa1.0-hp-osf
+ exit 0 ;;
+ i?86:OSF1:*:*)
+ if [ -x /usr/sbin/sysversion ] ; then
+ echo ${UNAME_MACHINE}-unknown-osf1mk
+ else
+ echo ${UNAME_MACHINE}-unknown-osf1
+ fi
+ exit 0 ;;
+ parisc*:Lites*:*:*)
+ echo hppa1.1-hp-lites
+ exit 0 ;;
+ hppa*:OpenBSD:*:*)
+ echo hppa-unknown-openbsd
+ exit 0 ;;
+ C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+ echo c1-convex-bsd
+ exit 0 ;;
+ C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit 0 ;;
+ C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+ echo c34-convex-bsd
+ exit 0 ;;
+ C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+ echo c38-convex-bsd
+ exit 0 ;;
+ C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+ echo c4-convex-bsd
+ exit 0 ;;
+ CRAY*X-MP:*:*:*)
+ echo xmp-cray-unicos
+ exit 0 ;;
+ CRAY*Y-MP:*:*:*)
+ echo ymp-cray-unicos${UNAME_RELEASE}
+ exit 0 ;;
+ CRAY*[A-Z]90:*:*:*)
+ echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
+ exit 0 ;;
+ CRAY*TS:*:*:*)
+ echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit 0 ;;
+ CRAY*T3D:*:*:*)
+ echo alpha-cray-unicos
+ exit 0 ;;
+ CRAY*T3E:*:*:*)
+ echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit 0 ;;
+ CRAY*SV1:*:*:*)
+ echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit 0 ;;
+ CRAY-2:*:*:*)
+ echo cray2-cray-unicos
+ exit 0 ;;
+ F300:UNIX_System_V:*:*)
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit 0 ;;
+ F301:UNIX_System_V:*:*)
+ echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'`
+ exit 0 ;;
+ hp3[0-9][05]:NetBSD:*:*)
+ echo m68k-hp-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ hp300:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ i?86:BSD/386:*:* | i?86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+ echo ${X86CPU}-pc-bsdi${UNAME_RELEASE}
+ exit 0 ;;
+ sparc*:BSD/OS:*:*)
+ echo sparc-unknown-bsdi${UNAME_RELEASE}
+ exit 0 ;;
+ *:BSD/OS:*:*)
+ echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+ exit 0 ;;
+ i386:FreeBSD:*:*)
+ echo ${X86CPU}-pc-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ exit 0 ;;
+ *:FreeBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ exit 0 ;;
+ i386:NetBSD:*:*)
+ echo ${X86CPU}-pc-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ *:NetBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ i386:OpenBSD:*:*)
+ echo ${X86CPU}-pc-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ *:OpenBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ i*:CYGWIN*:*)
+ echo ${X86CPU}-pc-cygwin
+ exit 0 ;;
+ i*:MINGW*:*)
+ echo ${UNAME_MACHINE}-pc-mingw32
+ exit 0 ;;
+ i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+ # UNAME_MACHINE based on the output of uname instead of i386?
+ echo i386-pc-interix
+ exit 0 ;;
+ i*:UWIN*:*)
+ echo ${UNAME_MACHINE}-pc-uwin
+ exit 0 ;;
+ p*:CYGWIN*:*)
+ echo powerpcle-unknown-cygwin
+ exit 0 ;;
+ prep*:SunOS:5.*:*)
+ echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ *:GNU:*:*)
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+ exit 0 ;;
+ *:Linux:*:*)
+
+ # The BFD linker knows what the default object file format is, so
+ # first see if it will tell us. cd to the root directory to prevent
+ # problems with other programs or directories called `ld' in the path.
+ ld_help_string=`cd /; ld --help 2>&1`
+ ld_supported_emulations=`echo $ld_help_string \
+ | sed -ne '/supported emulations:/!d
+ s/[ ][ ]*/ /g
+ s/.*supported emulations: *//
+ s/ .*//
+ p'`
+ case "$ld_supported_emulations" in
+ *ia64)
+ echo "${UNAME_MACHINE}-unknown-linux"
+ exit 0
+ ;;
+ i?86linux)
+ echo "${X86CPU}-pc-linux-gnuaout"
+ exit 0
+ ;;
+ i?86coff)
+ echo "${X86CPU}-pc-linux-gnucoff"
+ exit 0
+ ;;
+ sparclinux)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuaout"
+ exit 0
+ ;;
+ armlinux)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuaout"
+ exit 0
+ ;;
+ elf32arm*)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuoldld"
+ exit 0
+ ;;
+ armelf_linux*)
+ echo "${UNAME_MACHINE}-unknown-linux-gnu"
+ exit 0
+ ;;
+ m68klinux)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuaout"
+ exit 0
+ ;;
+ elf32ppc | elf32ppclinux)
+ # Determine Lib Version
+ cat >$dummy.c <<EOF
+#include <features.h>
+#if defined(__GLIBC__)
+extern char __libc_version[];
+extern char __libc_release[];
+#endif
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+#if defined(__GLIBC__)
+ printf("%s %s\n", __libc_version, __libc_release);
+#else
+ printf("unkown\n");
+#endif
+ return 0;
+}
+EOF
+ LIBC=""
+ $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ ./$dummy | grep 1\.99 > /dev/null
+ if test "$?" = 0 ; then
+ LIBC="libc1"
+ fi
+ fi
+ rm -f $dummy.c $dummy
+ echo powerpc-unknown-linux-gnu${LIBC}
+ exit 0
+ ;;
+ esac
+
+ if test "${UNAME_MACHINE}" = "alpha" ; then
+ cat <<EOF >$dummy.s
+ .data
+ \$Lformat:
+ .byte 37,100,45,37,120,10,0 # "%d-%x\n"
+
+ .text
+ .globl main
+ .align 4
+ .ent main
+ main:
+ .frame \$30,16,\$26,0
+ ldgp \$29,0(\$27)
+ .prologue 1
+ .long 0x47e03d80 # implver \$0
+ lda \$2,-1
+ .long 0x47e20c21 # amask \$2,\$1
+ lda \$16,\$Lformat
+ mov \$0,\$17
+ not \$1,\$18
+ jsr \$26,printf
+ ldgp \$29,0(\$26)
+ mov 0,\$16
+ jsr \$26,exit
+ .end main
+EOF
+ LIBC=""
+ $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ case `./$dummy` in
+ 0-0)
+ UNAME_MACHINE="alpha"
+ ;;
+ 1-0)
+ UNAME_MACHINE="alphaev5"
+ ;;
+ 1-1)
+ UNAME_MACHINE="alphaev56"
+ ;;
+ 1-101)
+ UNAME_MACHINE="alphapca56"
+ ;;
+ 2-303)
+ UNAME_MACHINE="alphaev6"
+ ;;
+ 2-307)
+ UNAME_MACHINE="alphaev67"
+ ;;
+ esac
+
+ objdump --private-headers $dummy | \
+ grep ld.so.1 > /dev/null
+ if test "$?" = 0 ; then
+ LIBC="libc1"
+ fi
+ fi
+ rm -f $dummy.s $dummy
+ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0
+ elif test "${UNAME_MACHINE}" = "mips" ; then
+ cat >$dummy.c <<EOF
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+#ifdef __MIPSEB__
+ printf ("%s-unknown-linux-gnu\n", argv[1]);
+#endif
+#ifdef __MIPSEL__
+ printf ("%sel-unknown-linux-gnu\n", argv[1]);
+#endif
+ return 0;
+}
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ elif test "${UNAME_MACHINE}" = "s390"; then
+ echo s390-ibm-linux && exit 0
+ else
+ # Either a pre-BFD a.out linker (linux-gnuoldld)
+ # or one that does not give us useful --help.
+ # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout.
+ # If ld does not provide *any* "supported emulations:"
+ # that means it is gnuoldld.
+ echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:"
+ test $? != 0 && echo "${X86CPU}-pc-linux-gnuoldld" && exit 0
+
+ case "${UNAME_MACHINE}" in
+ i?86)
+ VENDOR=pc;
+ UNAME_MACHINE=${X86CPU}
+ ;;
+ *)
+ VENDOR=unknown;
+ ;;
+ esac
+ # Determine whether the default compiler is a.out or elf
+ cat >$dummy.c <<EOF
+#include <features.h>
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+#ifdef __ELF__
+# ifdef __GLIBC__
+# if __GLIBC__ >= 2
+ printf ("%s-${VENDOR}-linux-gnu\n", argv[1]);
+# else
+ printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
+# endif
+# else
+ printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
+# endif
+#else
+ printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]);
+#endif
+ return 0;
+}
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ fi ;;
+# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. earlier versions
+# are messed up and put the nodename in both sysname and nodename.
+ i?86:DYNIX/ptx:4*:*)
+ echo i386-sequent-sysv4
+ exit 0 ;;
+ i?86:UNIX_SV:4.2MP:2.*)
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+ # I am not positive that other SVR4 systems won't match this,
+ # I just have to hope. -- rms.
+ # Use sysv4.2uw... so that sysv4* matches it.
+ echo ${X86CPU}-pc-sysv4.2uw${UNAME_VERSION}
+ exit 0 ;;
+ i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*)
+ UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+ if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+ echo ${X86CPU}-univel-sysv${UNAME_REL}
+ else
+ echo ${X86CPU}-pc-sysv${UNAME_REL}
+ fi
+ exit 0 ;;
+ i?86:*:5:7*)
+ # Fixed at (any) Pentium or better
+ UNAME_MACHINE=i586
+ if [ ${UNAME_SYSTEM} = "UnixWare" ] ; then
+ echo ${X86CPU}-sco-sysv${UNAME_RELEASE}uw${UNAME_VERSION}
+ else
+ echo ${X86CPU}-pc-sysv${UNAME_RELEASE}
+ fi
+ exit 0 ;;
+ i?86:*:3.2:*)
+ if test -f /usr/options/cb.name; then
+ UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+ echo ${X86CPU}-pc-isc$UNAME_REL
+ elif /bin/uname -X 2>/dev/null >/dev/null ; then
+ UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')`
+ (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486
+ (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \
+ && UNAME_MACHINE=i586
+ (/bin/uname -X|egrep '^Machine.*Pent ?II' >/dev/null) \
+ && UNAME_MACHINE=i686
+ (/bin/uname -X|egrep '^Machine.*Pentium Pro' >/dev/null) \
+ && UNAME_MACHINE=i686
+ echo ${X86CPU}-pc-sco$UNAME_REL
+ else
+ echo ${X86CPU}-pc-sysv32
+ fi
+ exit 0 ;;
+ i?86:*DOS:*:*)
+ echo ${X86CPU}-pc-msdosdjgpp
+ exit 0 ;;
+ pc:*:*:*)
+ # Left here for compatibility:
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+ # the processor, so we play safe by assuming i386.
+ echo i386-pc-msdosdjgpp
+ exit 0 ;;
+ Intel:Mach:3*:*)
+ echo i386-pc-mach3
+ exit 0 ;;
+ paragon:*:*:*)
+ echo i860-intel-osf1
+ exit 0 ;;
+ i860:*:4.*:*) # i860-SVR4
+ if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+ echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+ else # Add other i860-SVR4 vendors below as they are discovered.
+ echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
+ fi
+ exit 0 ;;
+ mini*:CTIX:SYS*5:*)
+ # "miniframe"
+ echo m68010-convergent-sysv
+ exit 0 ;;
+ M68*:*:R3V[567]*:*)
+ test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+ 3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0)
+ OS_REL=''
+ test -r /etc/.relid \
+ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+ && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+ 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && echo i486-ncr-sysv4 && exit 0 ;;
+ m68*:LynxOS:2.*:*)
+ echo m68k-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ mc68030:UNIX_System_V:4.*:*)
+ echo m68k-atari-sysv4
+ exit 0 ;;
+ i?86:LynxOS:2.*:* | i?86:LynxOS:3.[01]*:*)
+ echo i386-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ TSUNAMI:LynxOS:2.*:*)
+ echo sparc-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*)
+ echo rs6000-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ SM[BE]S:UNIX_SV:*:*)
+ echo mips-dde-sysv${UNAME_RELEASE}
+ exit 0 ;;
+ RM*:ReliantUNIX-*:*:*)
+ echo mips-sni-sysv4
+ exit 0 ;;
+ RM*:SINIX-*:*:*)
+ echo mips-sni-sysv4
+ exit 0 ;;
+ *:SINIX-*:*:*)
+ if uname -p 2>/dev/null >/dev/null ; then
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ echo ${UNAME_MACHINE}-sni-sysv4
+ else
+ echo ns32k-sni-sysv
+ fi
+ exit 0 ;;
+ PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+ exit 0 ;;
+ *:UNIX_System_V:4*:FTX*)
+ # From Gerald Hewes <hewes@openmarket.com>.
+ # How about differentiating between stratus architectures? -djm
+ echo hppa1.1-stratus-sysv4
+ exit 0 ;;
+ *:*:*:FTX*)
+ # From seanf@swdc.stratus.com.
+ echo i860-stratus-sysv4
+ exit 0 ;;
+ mc68*:A/UX:*:*)
+ echo m68k-apple-aux${UNAME_RELEASE}
+ exit 0 ;;
+ news*:NEWS-OS:*:6*)
+ echo mips-sony-newsos6
+ exit 0 ;;
+ R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+ if [ -d /usr/nec ]; then
+ echo mips-nec-sysv${UNAME_RELEASE}
+ else
+ echo mips-unknown-sysv${UNAME_RELEASE}
+ fi
+ exit 0 ;;
+ BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
+ echo powerpc-be-beos
+ exit 0 ;;
+ BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
+ echo powerpc-apple-beos
+ exit 0 ;;
+ BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
+ echo i586-pc-beos
+ exit 0 ;;
+ SX-4:SUPER-UX:*:*)
+ echo sx4-nec-superux${UNAME_RELEASE}
+ exit 0 ;;
+ SX-5:SUPER-UX:*:*)
+ echo sx5-nec-superux${UNAME_RELEASE}
+ exit 0 ;;
+ Power*:Rhapsody:*:*)
+ echo powerpc-apple-rhapsody${UNAME_RELEASE}
+ exit 0 ;;
+ *:Rhapsody:*:*)
+ echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+ exit 0 ;;
+ Power*:Mac*OS:*:*)
+ echo powerpc-apple-macos${UNAME_RELEASE}
+ exit 0 ;;
+ *:Mac*OS:*:*)
+ echo ${UNAME_MACHINE}-apple-macos${UNAME_RELEASE}
+ exit 0 ;;
+ *:Darwin:*:*)
+ echo `uname -p`-apple-darwin${UNAME_RELEASE}
+ exit 0 ;;
+ *:procnto*:*:* | *:QNX:[0123456789]*:*)
+ if test "${UNAME_MACHINE}" = "x86pc"; then
+ UNAME_MACHINE=pc
+ fi
+ echo `uname -p`-${UNAME_MACHINE}-nto-qnx
+ exit 0 ;;
+ *:QNX:*:4*)
+ echo i386-pc-qnx
+ exit 0 ;;
+ NSR-W:NONSTOP_KERNEL:*:*)
+ echo nsr-tandem-nsk${UNAME_RELEASE}
+ exit 0 ;;
+ BS2000:POSIX*:*:*)
+ echo bs2000-siemens-sysv
+ exit 0 ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+ /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
+ I don't know.... */
+ printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+ printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+ "4"
+#else
+ ""
+#endif
+ ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+ printf ("arm-acorn-riscix"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+ printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+ int version;
+ version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+ if (version < 4)
+ printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+ else
+ printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+ exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+ printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+ printf ("ns32k-encore-mach\n"); exit (0);
+#else
+ printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+ printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+ printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+ printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+ struct utsname un;
+
+ uname(&un);
+
+ if (strncmp(un.version, "V2", 2) == 0) {
+ printf ("i386-sequent-ptx2\n"); exit (0);
+ }
+ if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+ printf ("i386-sequent-ptx1\n"); exit (0);
+ }
+ printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+#if !defined (ultrix)
+ printf ("vax-dec-bsd\n"); exit (0);
+#else
+ printf ("vax-dec-ultrix\n"); exit (0);
+#endif
+#endif
+
+#if defined (alliant) && defined (i860)
+ printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+ exit (1);
+}
+EOF
+
+$CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy && rm $dummy.c $dummy && exit 0
+rm -f $dummy.c $dummy
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+ case `getsysinfo -f cpu_type` in
+ c1*)
+ echo c1-convex-bsd
+ exit 0 ;;
+ c2*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit 0 ;;
+ c34*)
+ echo c34-convex-bsd
+ exit 0 ;;
+ c38*)
+ echo c38-convex-bsd
+ exit 0 ;;
+ c4*)
+ echo c4-convex-bsd
+ exit 0 ;;
+ esac
+fi
+
+#echo '(Unable to guess system type)' 1>&2
+
+exit 1
diff --git a/rts/gmp/config.in b/rts/gmp/config.in
new file mode 100644
index 0000000000..8b2546ef16
--- /dev/null
+++ b/rts/gmp/config.in
@@ -0,0 +1,162 @@
+/* config.in. Generated automatically from configure.in by autoheader. */
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+
+/* Define if a limb is long long. */
+#undef _LONG_LONG_LIMB
+
+/* Define if we have native implementation of function. */
+#undef HAVE_NATIVE_
+#undef HAVE_NATIVE_mpn_add
+#undef HAVE_NATIVE_mpn_add_1
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addmul_1
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addsub_n
+#undef HAVE_NATIVE_mpn_addsub_nc
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdivmod
+#undef HAVE_NATIVE_mpn_cmp
+#undef HAVE_NATIVE_mpn_com_n
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_divrem_newton
+#undef HAVE_NATIVE_mpn_divrem_classic
+#undef HAVE_NATIVE_mpn_dump
+#undef HAVE_NATIVE_mpn_gcd
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_gcdext
+#undef HAVE_NATIVE_mpn_get_str
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mul
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_mul_n
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_perfect_square_p
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_random2
+#undef HAVE_NATIVE_mpn_random
+#undef HAVE_NATIVE_mpn_rawrandom
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_scan0
+#undef HAVE_NATIVE_mpn_scan1
+#undef HAVE_NATIVE_mpn_set_str
+#undef HAVE_NATIVE_mpn_sqrtrem
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sub
+#undef HAVE_NATIVE_mpn_sub_1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_submul_1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_udiv_w_sdiv
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n
+
+/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
+ */
+#undef HAVE_DECL_OPTARG
+
+/* ./configure --enable-assert option, to enable some ASSERT()s */
+#undef WANT_ASSERT
+
+/* Define if you have the <sys/sysctl.h> header file. */
+#undef HAVE_SYS_SYSCTL_H
+
+/* Define if you have the `strtoul' function. */
+#undef HAVE_STRTOUL
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define if you have the `sysctlbyname' function. */
+#undef HAVE_SYSCTLBYNAME
+
+/* Define if the system has the type `void'. */
+#undef HAVE_VOID
+
+/* Define if you have the `popen' function. */
+#undef HAVE_POPEN
+
+/* ./configure --disable-alloca option, to use stack-alloc.c, not alloca */
+#undef USE_STACK_ALLOC
+
+/* Define if cpp supports the ANSI # stringizing operator. */
+#undef HAVE_STRINGIZE
+
+/* Define if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define if you have the `processor_info' function. */
+#undef HAVE_PROCESSOR_INFO
+
+/* Version number of package */
+#undef VERSION
+
+/* Define if you have the `getopt_long' function. */
+#undef HAVE_GETOPT_LONG
+
+/* Define if you have the <getopt.h> header file. */
+#undef HAVE_GETOPT_H
+
+/* Define if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define if a speed_cyclecounter exists (for the tune programs) */
+#undef HAVE_SPEED_CYCLECOUNTER
+
+/* Define if mpn/tests has calling conventions checking for the CPU */
+#undef HAVE_CALLING_CONVENTIONS
+
+/* ./configure --enable-fft option, to enable FFTs for multiplication */
+#undef WANT_FFT
+
+/* Define if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
diff --git a/rts/gmp/config.sub b/rts/gmp/config.sub
new file mode 100644
index 0000000000..c4123f28ff
--- /dev/null
+++ b/rts/gmp/config.sub
@@ -0,0 +1,1273 @@
+#! /bin/sh
+# Configuration validation subroutine script, version 1.1.
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+# Free Software Foundation, Inc.
+#
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine. It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Written by Per Bothner <bothner@cygnus.com>.
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support. The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+if [ x$1 = x ]
+then
+ echo Configuration name missing. 1>&2
+ echo "Usage: $0 CPU-MFR-OPSYS" 1>&2
+ echo "or $0 ALIAS" 1>&2
+ echo where ALIAS is a recognized configuration type. 1>&2
+ exit 1
+fi
+
+# First pass through any local machine types.
+case $1 in
+ *local*)
+ echo $1
+ exit 0
+ ;;
+ *)
+ ;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+ nto-qnx* | linux-gnu*)
+ os=-$maybe_os
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+ ;;
+ *)
+ basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+ if [ $basic_machine != $1 ]
+ then os=`echo $1 | sed 's/.*-/-/'`
+ else os=; fi
+ ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work. We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+ -sun*os*)
+ # Prevent following clause from handling this invalid input.
+ ;;
+ -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+ -apple)
+ os=
+ basic_machine=$1
+ ;;
+ -sim | -cisco | -oki | -wec | -winbond)
+ os=
+ basic_machine=$1
+ ;;
+ -scout)
+ ;;
+ -wrs)
+ os=-vxworks
+ basic_machine=$1
+ ;;
+ -hiux*)
+ os=-hiuxwe2
+ ;;
+ -sco5)
+ os=-sco3.2v5
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco4)
+ os=-sco3.2v4
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2.[4-9]*)
+ os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2v[4-9]*)
+ # Don't forget version if it is 3.2v4 or newer.
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco*)
+ os=-sco3.2v2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -udk*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -isc)
+ os=-isc2.2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -clix*)
+ basic_machine=clipper-intergraph
+ ;;
+ -isc*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -lynx*)
+ os=-lynxos
+ ;;
+ -ptx*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+ ;;
+ -windowsnt*)
+ os=`echo $os | sed -e 's/windowsnt/winnt/'`
+ ;;
+ -psos*)
+ os=-psos
+ ;;
+ -mint | -mint[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+ # Recognize the basic CPU types without company name.
+ # Some are omitted here because they have special meanings below.
+ tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \
+ | arme[lb] | pyramid | mn10200 | mn10300 | tron | a29k \
+ | 580 | i960 | h8300 \
+ | x86 | ppcbe | mipsbe | mipsle | shbe | shle | armbe | armle \
+ | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \
+ | alpha | alphaev[4-8] | alphaev56 | alphapca5[67] \
+ | alphaev6[78] \
+ | we32k | ns16k | clipper | i370 | sh | powerpc | powerpcle \
+ | 1750a | dsp16xx | pdp11 | mips16 | mips64 | mipsel | mips64el \
+ | mips64orion | mips64orionel | mipstx39 | mipstx39el \
+ | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \
+ | mips64vr5000 | miprs64vr5000el | mcore \
+ | sparc | sparclet | sparclite | sparc64 | sparcv9 | v850 | c4x \
+ | powerpc64 | sparcv8 | supersparc | microsparc | ultrasparc \
+ | thumb | d10v | fr30 | avr)
+ basic_machine=$basic_machine-unknown
+ ;;
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | z8k | v70 | h8500 | w65 | pj | pjl)
+ ;;
+
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+ # (2) the word "unknown" tends to confuse beginning users.
+ i[34567]86 | pentium[23] | k[56] | k6[23] | athlon)
+ basic_machine=$basic_machine-pc
+ ;;
+ # Object if more than one company name word.
+ *-*-*)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+ # Recognize the basic CPU types with company name.
+ vax-* | tahoe-* | i[34567]86-* | pentium[23]-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \
+ | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \
+ | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \
+ | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \
+ | xmp-* | ymp-* \
+ | x86-* | ppcbe-* | mipsbe-* | mipsle-* | shbe-* | shle-* | armbe-* | armle-* \
+ | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* | hppa2.0n-* \
+ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphapca5[67]-* \
+ | alphaev6[78]-* \
+ | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \
+ | clipper-* | orion-* \
+ | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \
+ | sparc64-* | sparcv9-* | sparc86x-* | mips16-* | mips64-* | mipsel-* \
+ | mips64el-* | mips64orion-* | mips64orionel-* \
+ | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \
+ | mipstx39-* | mipstx39el-* | mcore-* \
+ | f301-* | armv*-* | s390-* | sv1-* | t3e-* \
+ | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \
+ | k[56]-* | k6[23]-* | athlon-* | powerpc64-* \
+ | sparcv8-* | supersparc-* | microsparc-* | ultrasparc-* \
+ | thumb-* | v850-* | d30v-* | tic30-* | c30-* | fr30-* )
+ ;;
+ # Recognize the various machine names and aliases which stand
+ # for a CPU type and a company and sometimes even an OS.
+ 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+ basic_machine=m68000-att
+ ;;
+ 3b*)
+ basic_machine=we32k-att
+ ;;
+ a29khif)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ os=-scout
+ ;;
+ alliant | fx80)
+ basic_machine=fx80-alliant
+ ;;
+ altos | altos3068)
+ basic_machine=m68k-altos
+ ;;
+ am29k)
+ basic_machine=a29k-none
+ os=-bsd
+ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ os=-sysv
+ ;;
+ amiga | amiga-*)
+ basic_machine=m68k-cbm
+ ;;
+ amigaos | amigados)
+ basic_machine=m68k-cbm
+ os=-amigaos
+ ;;
+ amigaunix | amix)
+ basic_machine=m68k-cbm
+ os=-sysv4
+ ;;
+ apollo68)
+ basic_machine=m68k-apollo
+ os=-sysv
+ ;;
+ apollo68bsd)
+ basic_machine=m68k-apollo
+ os=-bsd
+ ;;
+ aux)
+ basic_machine=m68k-apple
+ os=-aux
+ ;;
+ balance)
+ basic_machine=ns32k-sequent
+ os=-dynix
+ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ os=-bsd
+ ;;
+ convex-c2)
+ basic_machine=c2-convex
+ os=-bsd
+ ;;
+ convex-c32)
+ basic_machine=c32-convex
+ os=-bsd
+ ;;
+ convex-c34)
+ basic_machine=c34-convex
+ os=-bsd
+ ;;
+ convex-c38)
+ basic_machine=c38-convex
+ os=-bsd
+ ;;
+ cray | ymp)
+ basic_machine=ymp-cray
+ os=-unicos
+ ;;
+ cray2)
+ basic_machine=cray2-cray
+ os=-unicos
+ ;;
+ [ctj]90-cray)
+ basic_machine=c90-cray
+ os=-unicos
+ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ ;;
+ da30 | da30-*)
+ basic_machine=m68k-da30
+ ;;
+ decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+ basic_machine=mips-dec
+ ;;
+ delta | 3300 | motorola-3300 | motorola-delta \
+ | 3300-motorola | delta-motorola)
+ basic_machine=m68k-motorola
+ ;;
+ delta88)
+ basic_machine=m88k-motorola
+ os=-sysv3
+ ;;
+ dpx20 | dpx20-*)
+ basic_machine=rs6000-bull
+ os=-bosx
+ ;;
+ dpx2* | dpx2*-bull)
+ basic_machine=m68k-bull
+ os=-sysv3
+ ;;
+ ebmon29k)
+ basic_machine=a29k-amd
+ os=-ebmon
+ ;;
+ elxsi)
+ basic_machine=elxsi-elxsi
+ os=-bsd
+ ;;
+ encore | umax | mmax)
+ basic_machine=ns32k-encore
+ ;;
+ es1800 | OSE68k | ose68k | ose | OSE)
+ basic_machine=m68k-ericsson
+ os=-ose
+ ;;
+ fx2800)
+ basic_machine=i860-alliant
+ ;;
+ genix)
+ basic_machine=ns32k-ns
+ ;;
+ gmicro)
+ basic_machine=tron-gmicro
+ os=-sysv
+ ;;
+ h3050r* | hiux*)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ h8300hms)
+ basic_machine=h8300-hitachi
+ os=-hms
+ ;;
+ h8300xray)
+ basic_machine=h8300-hitachi
+ os=-xray
+ ;;
+ h8500hms)
+ basic_machine=h8500-hitachi
+ os=-hms
+ ;;
+ harris)
+ basic_machine=m88k-harris
+ os=-sysv3
+ ;;
+ hp300-*)
+ basic_machine=m68k-hp
+ ;;
+ hp300bsd)
+ basic_machine=m68k-hp
+ os=-bsd
+ ;;
+ hp300hpux)
+ basic_machine=m68k-hp
+ os=-hpux
+ ;;
+ hp3k9[0-9][0-9] | hp9[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k2[0-9][0-9] | hp9k31[0-9])
+ basic_machine=m68000-hp
+ ;;
+ hp9k3[2-9][0-9])
+ basic_machine=m68k-hp
+ ;;
+ hp9k6[0-9][0-9] | hp6[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k7[0-79][0-9] | hp7[0-79][0-9])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k78[0-9] | hp78[0-9])
+ basic_machine=hppa2.0-hp
+ ;;
+ hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+ basic_machine=hppa2.0-hp
+ ;;
+ hp9k8[0-9][13679] | hp8[0-9][13679])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[0-9][0-9] | hp8[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hppa-next)
+ os=-nextstep3
+ ;;
+ hppaosf)
+ basic_machine=hppa1.1-hp
+ os=-osf
+ ;;
+ hppro)
+ basic_machine=hppa1.1-hp
+ os=-proelf
+ ;;
+ i370-ibm* | ibm*)
+ basic_machine=i370-ibm
+ ;;
+# I'm not sure what "Sysv32" means. Should this be sysv3.2?
+ i[34567]86v32)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv32
+ ;;
+ i[34567]86v4*)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv4
+ ;;
+ i[34567]86v)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv
+ ;;
+ i[34567]86sol2)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-solaris2
+ ;;
+ i386mach)
+ basic_machine=i386-mach
+ os=-mach
+ ;;
+ i386-vsta | vsta)
+ basic_machine=i386-unknown
+ os=-vsta
+ ;;
+ i386-go32 | go32)
+ basic_machine=i386-unknown
+ os=-go32
+ ;;
+ i386-mingw32 | mingw32)
+ basic_machine=i386-unknown
+ os=-mingw32
+ ;;
+ iris | iris4d)
+ basic_machine=mips-sgi
+ case $os in
+ -irix*)
+ ;;
+ *)
+ os=-irix4
+ ;;
+ esac
+ ;;
+ isi68 | isi)
+ basic_machine=m68k-isi
+ os=-sysv
+ ;;
+ macppc*)
+ basic_machine=powerpc-apple
+ ;;
+ m88k-omron*)
+ basic_machine=m88k-omron
+ ;;
+ magnum | m3230)
+ basic_machine=mips-mips
+ os=-sysv
+ ;;
+ merlin)
+ basic_machine=ns32k-utek
+ os=-sysv
+ ;;
+ miniframe)
+ basic_machine=m68000-convergent
+ ;;
+ *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+ mipsel*-linux*)
+ basic_machine=mipsel-unknown
+ os=-linux-gnu
+ ;;
+ mips*-linux*)
+ basic_machine=mips-unknown
+ os=-linux-gnu
+ ;;
+ mips3*-*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+ ;;
+ mips3*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+ ;;
+ mmix*)
+ basic_machine=mmix-knuth
+ os=-mmixware
+ ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ msdos)
+ basic_machine=i386-unknown
+ os=-msdos
+ ;;
+ mvs)
+ basic_machine=i370-ibm
+ os=-mvs
+ ;;
+ ncr3000)
+ basic_machine=i486-ncr
+ os=-sysv4
+ ;;
+ netbsd386)
+ basic_machine=i386-unknown
+ os=-netbsd
+ ;;
+ netwinder)
+ basic_machine=armv4l-rebel
+ os=-linux
+ ;;
+ news | news700 | news800 | news900)
+ basic_machine=m68k-sony
+ os=-newsos
+ ;;
+ news1000)
+ basic_machine=m68030-sony
+ os=-newsos
+ ;;
+ news-3600 | risc-news)
+ basic_machine=mips-sony
+ os=-newsos
+ ;;
+ necv70)
+ basic_machine=v70-nec
+ os=-sysv
+ ;;
+ next | m*-next )
+ basic_machine=m68k-next
+ case $os in
+ -nextstep* )
+ ;;
+ -ns2*)
+ os=-nextstep2
+ ;;
+ *)
+ os=-nextstep3
+ ;;
+ esac
+ ;;
+ nh3000)
+ basic_machine=m68k-harris
+ os=-cxux
+ ;;
+ nh[45]000)
+ basic_machine=m88k-harris
+ os=-cxux
+ ;;
+ nindy960)
+ basic_machine=i960-intel
+ os=-nindy
+ ;;
+ mon960)
+ basic_machine=i960-intel
+ os=-mon960
+ ;;
+ np1)
+ basic_machine=np1-gould
+ ;;
+ nsr-tandem)
+ basic_machine=nsr-tandem
+ ;;
+ op50n-* | op60c-*)
+ basic_machine=hppa1.1-oki
+ os=-proelf
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+ os=-ose
+ ;;
+ os68k)
+ basic_machine=m68k-none
+ os=-os68k
+ ;;
+ pa-hitachi)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ paragon)
+ basic_machine=i860-intel
+ os=-osf
+ ;;
+ pbd)
+ basic_machine=sparc-tti
+ ;;
+ pbb)
+ basic_machine=m68k-tti
+ ;;
+ pc532 | pc532-*)
+ basic_machine=ns32k-pc532
+ ;;
+ pentiummmx | p55)
+ basic_machine=pentiummmx-pc
+ ;;
+ pentium | p5 | i586)
+ basic_machine=pentium-pc
+ ;;
+ pentiumpro | p6)
+ basic_machine=pentiumpro-pc
+ ;;
+ pentiummmx-* | p55-*)
+ basic_machine=pentiummmx-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentium-* | p5-* | i586-*)
+ basic_machine=pentium-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumpro-* | p6-*)
+ basic_machine=pentiumpro-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ nexen)
+ # We don't have specific support for Nexgen yet, so just call it a Pentium
+ basic_machine=i586-nexgen
+ ;;
+ pn)
+ basic_machine=pn-gould
+ ;;
+ power) basic_machine=rs6000-ibm
+ ;;
+ ppc) basic_machine=powerpc-unknown
+ ;;
+ ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppc64) basic_machine=powerpc64-unknown
+ ;;
+ ppc64-*)
+ basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppcle | powerpclittle | ppc-le | powerpc-little)
+ basic_machine=powerpcle-unknown
+ ;;
+ ppcle-* | powerpclittle-*)
+ basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ps2)
+ basic_machine=i386-ibm
+ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ rm[46]00)
+ basic_machine=mips-siemens
+ ;;
+ rtpc | rtpc-*)
+ basic_machine=romp-ibm
+ ;;
+ sa29200)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ sequent)
+ basic_machine=i386-sequent
+ ;;
+ sh)
+ basic_machine=sh-hitachi
+ os=-hms
+ ;;
+ sparclite-wrs)
+ basic_machine=sparclite-wrs
+ os=-vxworks
+ ;;
+ sps7)
+ basic_machine=m68k-bull
+ os=-sysv2
+ ;;
+ spur)
+ basic_machine=spur-unknown
+ ;;
+ st2000)
+ basic_machine=m68k-tandem
+ ;;
+ stratus)
+ basic_machine=i860-stratus
+ os=-sysv4
+ ;;
+ sun2)
+ basic_machine=m68000-sun
+ ;;
+ sun2os3)
+ basic_machine=m68000-sun
+ os=-sunos3
+ ;;
+ sun2os4)
+ basic_machine=m68000-sun
+ os=-sunos4
+ ;;
+ sun3os3)
+ basic_machine=m68k-sun
+ os=-sunos3
+ ;;
+ sun3os4)
+ basic_machine=m68k-sun
+ os=-sunos4
+ ;;
+ sun4os3)
+ basic_machine=sparc-sun
+ os=-sunos3
+ ;;
+ sun4os4)
+ basic_machine=sparc-sun
+ os=-sunos4
+ ;;
+ sun4sol2)
+ basic_machine=sparc-sun
+ os=-solaris2
+ ;;
+ sun3 | sun3-*)
+ basic_machine=m68k-sun
+ ;;
+ sun4)
+ basic_machine=sparc-sun
+ ;;
+ sun386 | sun386i | roadrunner)
+ basic_machine=i386-sun
+ ;;
+ sv1)
+ basic_machine=sv1-cray
+ os=-unicos
+ ;;
+ symmetry)
+ basic_machine=i386-sequent
+ os=-dynix
+ ;;
+ t3e)
+ basic_machine=t3e-cray
+ os=-unicos
+ ;;
+ tx39)
+ basic_machine=mipstx39-unknown
+ ;;
+ tx39el)
+ basic_machine=mipstx39el-unknown
+ ;;
+ tower | tower-32)
+ basic_machine=m68k-ncr
+ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ ultra3)
+ basic_machine=a29k-nyu
+ os=-sym1
+ ;;
+ v810 | necv810)
+ basic_machine=v810-nec
+ os=-none
+ ;;
+ vaxv)
+ basic_machine=vax-dec
+ os=-sysv
+ ;;
+ vms)
+ basic_machine=vax-dec
+ os=-vms
+ ;;
+ vpp*|vx|vx-*)
+ basic_machine=f301-fujitsu
+ ;;
+ vxworks960)
+ basic_machine=i960-wrs
+ os=-vxworks
+ ;;
+ vxworks68)
+ basic_machine=m68k-wrs
+ os=-vxworks
+ ;;
+ vxworks29k)
+ basic_machine=a29k-wrs
+ os=-vxworks
+ ;;
+ w65*)
+ basic_machine=w65-wdc
+ os=-none
+ ;;
+ w89k-*)
+ basic_machine=hppa1.1-winbond
+ os=-proelf
+ ;;
+ xmp)
+ basic_machine=xmp-cray
+ os=-unicos
+ ;;
+ xps | xps100)
+ basic_machine=xps100-honeywell
+ ;;
+ z8k-*-coff)
+ basic_machine=z8k-unknown
+ os=-sim
+ ;;
+ none)
+ basic_machine=none-none
+ os=-none
+ ;;
+
+# Here we handle the default manufacturer of certain CPU types. It is in
+# some cases the only manufacturer, in others, it is the most popular.
+ w89k)
+ basic_machine=hppa1.1-winbond
+ ;;
+ op50n)
+ basic_machine=hppa1.1-oki
+ ;;
+ op60c)
+ basic_machine=hppa1.1-oki
+ ;;
+ mips)
+ if [ x$os = x-linux-gnu ]; then
+ basic_machine=mips-unknown
+ else
+ basic_machine=mips-mips
+ fi
+ ;;
+ romp)
+ basic_machine=romp-ibm
+ ;;
+ rs6000)
+ basic_machine=rs6000-ibm
+ ;;
+ vax)
+ basic_machine=vax-dec
+ ;;
+ pdp11)
+ basic_machine=pdp11-dec
+ ;;
+ we32k)
+ basic_machine=we32k-att
+ ;;
+ sparc | sparcv9)
+ basic_machine=sparc-sun
+ ;;
+ cydra)
+ basic_machine=cydra-cydrome
+ ;;
+ orion)
+ basic_machine=orion-highlevel
+ ;;
+ orion105)
+ basic_machine=clipper-highlevel
+ ;;
+ mac | mpw | mac-mpw)
+ basic_machine=m68k-apple
+ ;;
+ pmac | pmac-mpw)
+ basic_machine=powerpc-apple
+ ;;
+ c4x*)
+ basic_machine=c4x-none
+ os=-coff
+ ;;
+ *)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+ *-digital*)
+ basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+ ;;
+ *-commodore*)
+ basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+ ;;
+ *)
+ ;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+ # First match some system type aliases
+ # that might get confused with valid system types.
+ # -solaris* is a basic system type, with this one exception.
+ -solaris1 | -solaris1.*)
+ os=`echo $os | sed -e 's|solaris1|sunos4|'`
+ ;;
+ -solaris)
+ os=-solaris2
+ ;;
+ -svr4*)
+ os=-sysv4
+ ;;
+ -unixware*)
+ os=-sysv4.2uw
+ ;;
+ -gnu/linux*)
+ os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+ ;;
+ # First accept the basic system types.
+ # The portable systems comes first.
+ # Each alternative MUST END IN A *, to match a version number.
+ # -sysv* is not here because it comes later, after sysvr4.
+ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+ | -aos* \
+ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+ | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
+ | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+ | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+ | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
+ | -openstep* | -oskit*)
+ # Remember, each alternative MUST END IN *, to match a version number.
+ ;;
+ -qnx*)
+ case $basic_machine in
+ x86-* | i[34567]86-*)
+ ;;
+ *)
+ os=-nto$os
+ ;;
+ esac
+ ;;
+ -nto*)
+ os=-nto-qnx
+ ;;
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+ | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ ;;
+ -mac*)
+ os=`echo $os | sed -e 's|mac|macos|'`
+ ;;
+ -linux*)
+ os=`echo $os | sed -e 's|linux|linux-gnu|'`
+ ;;
+ -sunos5*)
+ os=`echo $os | sed -e 's|sunos5|solaris2|'`
+ ;;
+ -sunos6*)
+ os=`echo $os | sed -e 's|sunos6|solaris3|'`
+ ;;
+ -opened*)
+ os=-openedition
+ ;;
+ -wince*)
+ os=-wince
+ ;;
+ -osfrose*)
+ os=-osfrose
+ ;;
+ -osf*)
+ os=-osf
+ ;;
+ -utek*)
+ os=-bsd
+ ;;
+ -dynix*)
+ os=-bsd
+ ;;
+ -acis*)
+ os=-aos
+ ;;
+ -386bsd)
+ os=-bsd
+ ;;
+ -ctix* | -uts*)
+ os=-sysv
+ ;;
+ -ns2 )
+ os=-nextstep2
+ ;;
+ -nsk)
+ os=-nsk
+ ;;
+ # Preserve the version number of sinix5.
+ -sinix5.*)
+ os=`echo $os | sed -e 's|sinix|sysv|'`
+ ;;
+ -sinix*)
+ os=-sysv4
+ ;;
+ -triton*)
+ os=-sysv3
+ ;;
+ -oss*)
+ os=-sysv3
+ ;;
+ -svr4)
+ os=-sysv4
+ ;;
+ -svr3)
+ os=-sysv3
+ ;;
+ -sysvr4)
+ os=-sysv4
+ ;;
+ # This must come after -sysvr4.
+ -sysv*)
+ ;;
+ -ose*)
+ os=-ose
+ ;;
+ -es1800*)
+ os=-ose
+ ;;
+ -xenix)
+ os=-xenix
+ ;;
+ -*mint | -*MiNT)
+ os=-mint
+ ;;
+ -none)
+ ;;
+ *)
+ # Get rid of the `-' at the beginning of $os.
+ os=`echo $os | sed 's/[^-]*-//'`
+ echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system. Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+ *-acorn)
+ os=-riscix1.2
+ ;;
+ arm*-rebel)
+ os=-linux
+ ;;
+ arm*-semi)
+ os=-aout
+ ;;
+ pdp11-*)
+ os=-none
+ ;;
+ *-dec | vax-*)
+ os=-ultrix4.2
+ ;;
+ m68*-apollo)
+ os=-domain
+ ;;
+ i386-sun)
+ os=-sunos4.0.2
+ ;;
+ m68000-sun)
+ os=-sunos3
+ # This also exists in the configure program, but was not the
+ # default.
+ # os=-sunos4
+ ;;
+ m68*-cisco)
+ os=-aout
+ ;;
+ mips*-cisco)
+ os=-elf
+ ;;
+ mips*-*)
+ os=-elf
+ ;;
+ *-tti) # must be before sparc entry or we get the wrong os.
+ os=-sysv3
+ ;;
+ sparc-* | *-sun)
+ os=-sunos4.1.1
+ ;;
+ *-be)
+ os=-beos
+ ;;
+ *-ibm)
+ os=-aix
+ ;;
+ *-wec)
+ os=-proelf
+ ;;
+ *-winbond)
+ os=-proelf
+ ;;
+ *-oki)
+ os=-proelf
+ ;;
+ *-hp)
+ os=-hpux
+ ;;
+ *-hitachi)
+ os=-hiux
+ ;;
+ i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+ os=-sysv
+ ;;
+ *-cbm)
+ os=-amigaos
+ ;;
+ *-dg)
+ os=-dgux
+ ;;
+ *-dolphin)
+ os=-sysv3
+ ;;
+ m68k-ccur)
+ os=-rtu
+ ;;
+ m88k-omron*)
+ os=-luna
+ ;;
+ *-next )
+ os=-nextstep
+ ;;
+ *-sequent)
+ os=-ptx
+ ;;
+ *-crds)
+ os=-unos
+ ;;
+ *-ns)
+ os=-genix
+ ;;
+ i370-*)
+ os=-mvs
+ ;;
+ *-next)
+ os=-nextstep3
+ ;;
+ *-gould)
+ os=-sysv
+ ;;
+ *-highlevel)
+ os=-bsd
+ ;;
+ *-encore)
+ os=-bsd
+ ;;
+ *-sgi)
+ os=-irix
+ ;;
+ *-siemens)
+ os=-sysv4
+ ;;
+ *-masscomp)
+ os=-rtu
+ ;;
+ f301-fujitsu)
+ os=-uxpv
+ ;;
+ *-rom68k)
+ os=-coff
+ ;;
+ *-*bug)
+ os=-coff
+ ;;
+ *-apple)
+ os=-macos
+ ;;
+ *-atari*)
+ os=-mint
+ ;;
+ *)
+ os=-none
+ ;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer. We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+ *-unknown)
+ case $os in
+ -riscix*)
+ vendor=acorn
+ ;;
+ -sunos*)
+ vendor=sun
+ ;;
+ -aix*)
+ vendor=ibm
+ ;;
+ -beos*)
+ vendor=be
+ ;;
+ -hpux*)
+ vendor=hp
+ ;;
+ -mpeix*)
+ vendor=hp
+ ;;
+ -hiux*)
+ vendor=hitachi
+ ;;
+ -unos*)
+ vendor=crds
+ ;;
+ -dgux*)
+ vendor=dg
+ ;;
+ -luna*)
+ vendor=omron
+ ;;
+ -genix*)
+ vendor=ns
+ ;;
+ -mvs* | -opened*)
+ vendor=ibm
+ ;;
+ -ptx*)
+ vendor=sequent
+ ;;
+ -vxsim* | -vxworks*)
+ vendor=wrs
+ ;;
+ -aux*)
+ vendor=apple
+ ;;
+ -hms*)
+ vendor=hitachi
+ ;;
+ -mpw* | -macos*)
+ vendor=apple
+ ;;
+ -*mint | -*MiNT)
+ vendor=atari
+ ;;
+ esac
+ basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+ ;;
+esac
+
+echo $basic_machine$os
diff --git a/rts/gmp/configure b/rts/gmp/configure
new file mode 100644
index 0000000000..8294680486
--- /dev/null
+++ b/rts/gmp/configure
@@ -0,0 +1,5216 @@
+#! /bin/sh
+# From configure.in Revision: 1.129.2.2
+# Guess values for system-dependent variables and create Makefiles.
+# Generated automatically using Autoconf version 2.14a.
+# Copyright (C) 1992, 93, 94, 95, 96, 98, 99, 2000
+# Free Software Foundation, Inc.
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+# Defaults:
+ac_default_prefix=/usr/local
+# Any additions from configure.in:
+
+# Initialize some variables set by options.
+ac_init_help=false
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+build=NONE
+cache_file=./config.cache
+exec_prefix=NONE
+host=NONE
+no_create=
+nonopt=NONE
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+target=NONE
+verbose=
+x_includes=NONE
+x_libraries=NONE
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+# Initialize some other variables.
+subdirs=
+MFLAGS= MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+# Maximum number of lines to put in a shell here document.
+: ${ac_max_here_lines=48}
+# Sed expression to map a string onto a valid sh and CPP variable names.
+ac_tr_sh='sed -e y%*+%pp%;s%[^a-zA-Z0-9_]%_%g'
+ac_tr_cpp='sed -e y%*abcdefghijklmnopqrstuvwxyz%PABCDEFGHIJKLMNOPQRSTUVWXYZ%;s%[^A-Z0-9_]%_%g'
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ ac_optarg=`echo "$ac_option" | sed -n 's/^[^=]*=//p'`
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case "$ac_option" in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir="$ac_optarg" ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build="$ac_optarg" ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file="$ac_optarg" ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir="$ac_optarg" ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`echo "$ac_option"|sed -e 's/-*disable-//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_feature" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid feature: $ac_feature" 1>&2; exit 1; }
+ fi
+ ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+ eval "enable_${ac_feature}=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`echo "$ac_option"|sed -e 's/-*enable-//' -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_feature" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid feature: $ac_feature" 1>&2; exit 1; }
+ fi
+ ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+ case "$ac_option" in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_${ac_feature}='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix="$ac_optarg" ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=: ;;
+ -host | --host | --hos | --ho)
+ ac_prev=host ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host="$ac_optarg" ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir="$ac_optarg" ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir="$ac_optarg" ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir="$ac_optarg" ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir="$ac_optarg" ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir="$ac_optarg" ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir="$ac_optarg" ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir="$ac_optarg" ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix="$ac_optarg" ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix="$ac_optarg" ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix="$ac_optarg" ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name="$ac_optarg" ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir="$ac_optarg" ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir="$ac_optarg" ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site="$ac_optarg" ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir="$ac_optarg" ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir="$ac_optarg" ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target="$ac_optarg" ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`echo "$ac_option"|sed -e 's/-*with-//' -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_package" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid package: $ac_package" 1>&2; exit 1; }
+ fi
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case "$ac_option" in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_${ac_package}='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`echo "$ac_option"|sed -e 's/-*without-//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_package" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid package: $ac_package" 1>&2; exit 1; }
+ fi
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ eval "with_${ac_package}=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes="$ac_optarg" ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries="$ac_optarg" ;;
+
+ -*) { echo "configure: error: unrecognized option: $ac_option
+Try \`configure --help' for more information." 1>&2; exit 1; }
+ ;;
+
+ *=*)
+ ac_envvar=`echo "$ac_option" | sed -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_envvar" | grep '[^a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid variable name: $ac_envvar" 1>&2; exit 1; }
+ fi
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+ eval "$ac_envvar='$ac_optarg'"
+ export $ac_envvar ;;
+
+ *)
+ if echo "$ac_option" | grep '[^-a-zA-Z0-9.]' >/dev/null 2>&1; then
+ echo "configure: warning: invalid host type: $ac_option" 1>&2
+ fi
+ if test "x$nonopt" != xNONE; then
+ { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; }
+ fi
+ nonopt="$ac_option"
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ { echo "configure: error: missing argument to --\`echo $ac_prev | sed 's/_/-/g'\`" 1>&2; exit 1; }
+fi
+if $ac_init_help; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<\EOF
+`configure' configures software source code packages to adapt to many kinds
+of systems.
+
+Usage: configure [OPTION]... [VAR=VALUE]... [HOST]
+
+To safely assign special values to environment variables (e.g., CC,
+CFLAGS...), give to `configure' the definition as VAR=VALUE.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help print this message
+ -V, --version print the version of autoconf that created configure
+ -q, --quiet, --silent do not print `checking...' messages
+ --cache-file=FILE cache test results in FILE
+ -n, --no-create do not create output files
+
+EOF
+
+ cat <<EOF
+Directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [same as prefix]
+ --bindir=DIR user executables in DIR [EPREFIX/bin]
+ --sbindir=DIR system admin executables in DIR [EPREFIX/sbin]
+ --libexecdir=DIR program executables in DIR [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data in DIR
+ [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data in DIR
+ [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var]
+ --libdir=DIR object code libraries in DIR [EPREFIX/lib]
+ --includedir=DIR C header files in DIR [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include]
+ --infodir=DIR info documentation in DIR [PREFIX/info]
+ --mandir=DIR man documentation in DIR [PREFIX/man]
+ --srcdir=DIR find the sources in DIR [configure dir or ..]
+EOF
+
+ cat <<\EOF
+
+Host type:
+ --build=BUILD configure for building on BUILD [BUILD=HOST]
+ --host=HOST configure for HOST [guessed]
+ --target=TARGET configure for TARGET [TARGET=HOST]
+EOF
+
+ cat <<\EOF
+
+Program names:
+ --program-prefix=PREFIX prepend PREFIX to installed program names
+ --program-suffix=SUFFIX append SUFFIX to installed program names
+ --program-transform-name=PROGRAM run sed PROGRAM on installed program names
+
+Optional Features:
+ --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
+ --enable-FEATURE=ARG include FEATURE ARG=yes
+ --disable-dependency-tracking Speeds up one-time builds
+ --enable-dependency-tracking Do not reject slow dependency extractors
+ --enable-maintainer-mode enable make rules and dependencies not useful
+ (and sometimes confusing) to the casual installer
+ --enable-assert enable ASSERT checking default=no
+ --enable-alloca use alloca for temp space default=yes
+ --enable-fft enable FFTs for multiplication default=no
+ --enable-mpbsd build Berkley MP compatibility library default=no
+ --enable-mpfr build MPFR default=no
+ --enable-shared=PKGS build shared libraries default=yes
+ --enable-static=PKGS build static libraries default=yes
+ --enable-fast-install=PKGS optimize for fast installation default=yes
+ --disable-libtool-lock avoid locking (might break parallel builds)
+
+Optional Packages:
+ --with-PACKAGE=ARG use PACKAGE ARG=yes
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-gnu-ld assume the C compiler uses GNU ld default=no
+ --with-pic try to use only PIC/non-PIC objects default=use both
+EOF
+ exit 0
+fi
+if $ac_init_version; then
+ cat <<\EOF
+Generated automatically using Autoconf version 2.14a.
+Copyright (C) 1992, 93, 94, 95, 96, 98, 99, 2000
+Free Software Foundation, Inc.
+
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+EOF
+ exit 0
+fi
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Also quote any args containing shell meta-characters.
+ac_configure_args=
+for ac_arg
+do
+ case "$ac_arg" in
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c) ;;
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"`
+ ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ *) ac_configure_args="$ac_configure_args $ac_arg" ;;
+ esac
+done
+
+# File descriptor usage:
+# 0 standard input
+# 1 file creation
+# 2 errors and warnings
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 6 checking for... messages and results
+# 5 compiler messages saved in config.log
+if test "$silent" = yes; then
+ exec 6>/dev/null
+else
+ exec 6>&1
+fi
+exec 5>./config.log
+
+echo "\
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by configure version 2.14a, executed with
+ > $0 $ac_configure_args
+" 1>&5
+
+# NLS nuisances.
+# Only set these to C if already set. These must not be set unconditionally
+# because not all systems understand e.g. LANG=C (notably SCO).
+# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'!
+# Non-C LC_CTYPE values break the ctype check.
+if test "${LANG+set}" = set; then LANG=C; export LANG; fi
+if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
+if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi
+if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# A filename unique to this package, relative to the directory that
+# configure is in, which we can look for to find out if srcdir is correct.
+ac_unique_file=
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_prog=$0
+ ac_confdir=`echo "$ac_prog" | sed 's%/[^/][^/]*$%%'`
+ test "x$ac_confdir" = "x$ac_prog" && ac_confdir=.
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "configure: error: cannot find sources in $ac_confdir or .." 1>&2; exit 1; }
+ else
+ { echo "configure: error: cannot find sources in $srcdir" 1>&2; exit 1; }
+ fi
+fi
+srcdir=`echo "$srcdir" | sed 's%\([^/]\)/*$%\1%'`
+
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ echo "loading site script $ac_site_file"
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ echo "loading cache $cache_file"
+ test -f "$cache_file" && . $cache_file
+else
+ echo "creating cache $cache_file"
+ >$cache_file
+fi
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ac_exeext=
+ac_objext=o
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#include <sys/types.h>
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#else
+# if HAVE_STRINGS_H
+# include <strings.h>
+# endif
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
+ # Stardent Vistra SVR4 grep lacks -e, says Kaveh R. Ghazi.
+ if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
+ ECHO_N= ECHO_C='
+' ECHO_T=' '
+ else
+ ECHO_N=-n ECHO_C= ECHO_T=
+ fi
+else
+ ECHO_N= ECHO_C='\c' ECHO_T=
+fi
+
+ac_aux_dir=
+for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/install-sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f $ac_dir/install.sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f $ac_dir/shtool; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ { echo "configure: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; }
+fi
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"
+ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure.
+
+echo $ECHO_N "checking host system type... $ECHO_C" 1>&6
+echo "configure:636: checking host system type" 1>&5
+if test "x$ac_cv_host" = "x" || (test "x$host" != "xNONE" && test "x$host" != "x$ac_cv_host_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_host_alias=$host
+ case "$ac_cv_host_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ if ac_cv_host_alias=`$ac_config_guess`; then :
+ else { echo "configure: error: cannot guess host type; you must specify one" 1>&2; exit 1; }
+ fi ;; *) ac_cv_host_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_host=`$ac_config_sub $ac_cv_host_alias` || exit 1
+ ac_cv_host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_host" 1>&6
+
+host=$ac_cv_host
+host_alias=$ac_cv_host_alias
+host_cpu=$ac_cv_host_cpu
+host_vendor=$ac_cv_host_vendor
+host_os=$ac_cv_host_os
+
+echo $ECHO_N "checking target system type... $ECHO_C" 1>&6
+echo "configure:672: checking target system type" 1>&5
+if test "x$ac_cv_target" = "x" || (test "x$target" != "xNONE" && test "x$target" != "x$ac_cv_target_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_target_alias=$target
+ case "$ac_cv_target_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ ac_cv_target_alias=$host_alias ;;
+ *) ac_cv_target_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_target=`$ac_config_sub $ac_cv_target_alias` || exit 1
+ ac_cv_target_cpu=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_target_vendor=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_target_os=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_target" 1>&6
+
+target=$ac_cv_target
+target_alias=$ac_cv_target_alias
+target_cpu=$ac_cv_target_cpu
+target_vendor=$ac_cv_target_vendor
+target_os=$ac_cv_target_os
+
+echo $ECHO_N "checking build system type... $ECHO_C" 1>&6
+echo "configure:707: checking build system type" 1>&5
+if test "x$ac_cv_build" = "x" || (test "x$build" != "xNONE" && test "x$build" != "x$ac_cv_build_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_build_alias=$build
+ case "$ac_cv_build_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ ac_cv_build_alias=$host_alias ;;
+ *) ac_cv_build_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || exit 1
+ ac_cv_build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_build" 1>&6
+
+build=$ac_cv_build
+build_alias=$ac_cv_build_alias
+build_cpu=$ac_cv_build_cpu
+build_vendor=$ac_cv_build_vendor
+build_os=$ac_cv_build_os
+
+# Do some error checking and defaulting for the host and target type.
+# The inputs are:
+# configure --host=HOST --target=TARGET --build=BUILD NONOPT
+#
+# The rules are:
+# 1. You are not allowed to specify --host, --target, and nonopt at the
+# same time.
+# 2. Host defaults to nonopt.
+# 3. If nonopt is not specified, then host defaults to the current host,
+# as determined by config.guess.
+# 4. Target and build default to nonopt.
+# 5. If nonopt is not specified, then target and build default to host.
+
+# The aliases save the names the user supplied, while $host etc.
+# will get canonicalized.
+case $host---$target---$nonopt in
+NONE---*---* | *---NONE---* | *---*---NONE) ;;
+*) { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } ;;
+esac
+
+test "$host_alias" != "$target_alias" &&
+ test "$program_prefix$program_suffix$program_transform_name" = \
+ NONENONEs,x,x, &&
+ program_prefix=${target_alias}-
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo $ECHO_N "checking for a BSD compatible install... $ECHO_C" 1>&6
+echo "configure:778: checking for a BSD compatible install" 1>&5
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":"
+ for ac_dir in $PATH; do
+ # Account for people who put trailing slashes in PATH elements.
+ case "$ac_dir/" in
+ /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ if test -f $ac_dir/$ac_prog; then
+ if test $ac_prog = install &&
+ grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ ac_cv_path_install="$ac_dir/$ac_prog -c"
+ break 2
+ fi
+ fi
+ done
+ ;;
+ esac
+ done
+ IFS="$ac_save_IFS"
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL="$ac_cv_path_install"
+ else
+ # As a last resort, use the slow shell script. We don't cache a
+ # path for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the path is relative.
+ INSTALL="$ac_install_sh"
+ fi
+fi
+echo "$ECHO_T""$INSTALL" 1>&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+echo $ECHO_N "checking whether build environment is sane... $ECHO_C" 1>&6
+echo "configure:835: checking whether build environment is sane" 1>&5
+# Just in case
+sleep 1
+echo timestamp > conftestfile
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments. Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+ set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null`
+ if test "$*" = "X"; then
+ # -L didn't work.
+ set X `ls -t $srcdir/configure conftestfile`
+ fi
+ if test "$*" != "X $srcdir/configure conftestfile" \
+ && test "$*" != "X conftestfile $srcdir/configure"; then
+
+ # If neither matched, then we have a broken ls. This can happen
+ # if, for instance, CONFIG_SHELL is bash and it inherits a
+ # broken ls alias from the environment. This has actually
+ # happened. Such a system could not be considered "sane".
+ { echo "configure: error: ls -t appears to fail. Make sure there is not a broken
+alias in your environment" 1>&2; exit 1; }
+ fi
+
+ test "$2" = conftestfile
+ )
+then
+ # Ok.
+ :
+else
+ { echo "configure: error: newly created file is older than distributed files!
+Check your system clock" 1>&2; exit 1; }
+fi
+rm -f conftest*
+echo "$ECHO_T""yes" 1>&6
+if test "$program_transform_name" = s,x,x,; then
+ program_transform_name=
+else
+ # Double any \ or $. echo might interpret backslashes.
+ cat <<\EOF >conftestsed
+s,\\,\\\\,g; s,\$,$$,g
+EOF
+ program_transform_name=`echo $program_transform_name | sed -f conftestsed`
+ rm -f conftestsed
+fi
+test "$program_prefix" != NONE &&
+ program_transform_name="s,^,${program_prefix},;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+ program_transform_name="s,\$\$,${program_suffix},;$program_transform_name"
+
+# sed with no file args requires a program.
+test "$program_transform_name" = "" && program_transform_name="s,x,x,"
+
+test x"${MISSING+set}" = xset || \
+ MISSING="\${SHELL} `CDPATH=: && cd $ac_aux_dir && pwd`/missing"
+if eval "$MISSING --run :"; then
+ am_missing_run="$MISSING --run "
+else
+ am_missing_run=
+ am_backtick='`'
+ echo "configure: warning: ${am_backtick}missing' script is too old or missing" 1>&2
+fi
+
+for ac_prog in mawk gawk nawk awk
+do
+# Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:906: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_AWK+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$AWK"; then
+ ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_AWK="$ac_prog"
+ break
+ done
+fi
+fi
+AWK="$ac_cv_prog_AWK"
+if test -n "$AWK"; then
+ echo "$ECHO_T""$AWK" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+test -n "$AWK" && break
+done
+
+echo $ECHO_N "checking whether ${MAKE-make} sets \${MAKE}... $ECHO_C" 1>&6
+echo "configure:939: checking whether ${MAKE-make} sets \${MAKE}" 1>&5
+set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'`
+if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftestmake <<\EOF
+all:
+ @echo 'ac_maketemp="${MAKE}"'
+EOF
+# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=`
+if test -n "$ac_maketemp"; then
+ eval ac_cv_prog_make_${ac_make}_set=yes
+else
+ eval ac_cv_prog_make_${ac_make}_set=no
+fi
+rm -f conftestmake
+fi
+if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then
+ echo "$ECHO_T""yes" 1>&6
+ SET_MAKE=
+else
+ echo "$ECHO_T""no" 1>&6
+ SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+# Check whether --enable-dependency-tracking or --disable-dependency-tracking was given.
+if test "${enable_dependency_tracking+set}" = set; then
+ enableval="$enable_dependency_tracking"
+
+fi
+if test "x$enable_dependency_tracking" = xno; then
+ AMDEP="#"
+else
+ am_depcomp="$ac_aux_dir/depcomp"
+ if test ! -f "$am_depcomp"; then
+ AMDEP="#"
+ else
+ AMDEP=
+ fi
+fi
+
+if test -z "$AMDEP"; then
+ AMDEPBACKSLASH='\'
+else
+ AMDEPBACKSLASH=
+fi
+
+if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then
+ DEPDIR=.deps
+else
+ DEPDIR=_deps
+fi
+
+PACKAGE=gmp
+
+VERSION=3.1.1
+
+if test "`CDPATH=: && cd $srcdir && pwd`" != "`pwd`" &&
+ test -f $srcdir/config.status; then
+ { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; }
+fi
+cat >>confdefs.h <<EOF
+#define PACKAGE "$PACKAGE"
+EOF
+
+cat >>confdefs.h <<EOF
+#define VERSION "$VERSION"
+EOF
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal"}
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake"}
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+AMTAR=${AMTAR-"${am_missing_run}tar"}
+
+if test -z "$install_sh"; then
+ install_sh="$ac_aux_dir/install-sh"
+ test -f "$install_sh" || install_sh="$ac_aux_dir/install.sh"
+ test -f "$install_sh" || install_sh="${am_missing_run}${ac_auxdir}/install-sh"
+ install_sh="`echo $install_sh | sed -e 's/\${SHELL}//'`"
+fi
+
+echo $ECHO_N "checking whether to enable maintainer-specific portions of Makefiles... $ECHO_C" 1>&6
+echo "configure:1029: checking whether to enable maintainer-specific portions of Makefiles" 1>&5
+ # Check whether --enable-maintainer-mode or --disable-maintainer-mode was given.
+if test "${enable_maintainer_mode+set}" = set; then
+ enableval="$enable_maintainer_mode"
+ USE_MAINTAINER_MODE=$enableval
+else
+ USE_MAINTAINER_MODE=no
+fi
+ echo "$ECHO_T""$USE_MAINTAINER_MODE" 1>&6
+
+if test $USE_MAINTAINER_MODE = yes; then
+ MAINTAINER_MODE_TRUE=
+ MAINTAINER_MODE_FALSE='#'
+else
+ MAINTAINER_MODE_TRUE='#'
+ MAINTAINER_MODE_FALSE=
+fi
+ MAINT=$MAINTAINER_MODE_TRUE
+
+gmp_configm4="config.m4"
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4
+test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i
+test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p
+
+# Check whether --enable-assert or --disable-assert was given.
+if test "${enable_assert+set}" = set; then
+ enableval="$enable_assert"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-assert, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_assert=no
+fi
+
+if test "$enable_assert" = "yes"; then
+ cat >>confdefs.h <<\EOF
+#define WANT_ASSERT 1
+EOF
+
+fi
+
+# Check whether --enable-alloca or --disable-alloca was given.
+if test "${enable_alloca+set}" = set; then
+ enableval="$enable_alloca"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-alloca, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_alloca=yes
+fi
+
+if test "$enable_alloca" = "no"; then
+ cat >>confdefs.h <<\EOF
+#define USE_STACK_ALLOC 1
+EOF
+
+fi
+
+# Check whether --enable-fft or --disable-fft was given.
+if test "${enable_fft+set}" = set; then
+ enableval="$enable_fft"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-fft, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_fft=no
+fi
+
+if test "$enable_fft" = "yes"; then
+ cat >>confdefs.h <<\EOF
+#define WANT_FFT 1
+EOF
+
+fi
+
+# Check whether --enable-mpbsd or --disable-mpbsd was given.
+if test "${enable_mpbsd+set}" = set; then
+ enableval="$enable_mpbsd"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-mpbsd, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_mpbsd=no
+fi
+
+if test "$enable_mpbsd" = "yes"; then
+ WANT_MPBSD_TRUE=
+ WANT_MPBSD_FALSE='#'
+else
+ WANT_MPBSD_TRUE='#'
+ WANT_MPBSD_FALSE=
+fi
+
+# Check whether --enable-mpfr or --disable-mpfr was given.
+if test "${enable_mpfr+set}" = set; then
+ enableval="$enable_mpfr"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-mpfr, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_mpfr=no
+fi
+
+if test "$enable_mpfr" = "yes"; then
+ WANT_MPFR_TRUE=
+ WANT_MPFR_FALSE='#'
+else
+ WANT_MPFR_TRUE='#'
+ WANT_MPFR_FALSE=
+fi
+
+os_64bit="no"
+cclist="gcc cc" # FIXME: Prefer c89 to cc.
+gmp_cflags_gcc="-g -O2"
+gmp_cflags64_gcc="-g -O2"
+gmp_cflags_cc="-g"
+gmp_cflags64_cc="-g"
+
+case "$target" in
+ # Alpha
+ alpha*-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ gmp_cflags_cc="$gmp_cflags_cc -O"
+ ;;
+ alpha*-*-osf*)
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in # compilers don't seem to understand `ev67' and such.
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ # FIXME: We shouldn't fail fatally if none of these work, but that's
+ # how xoptcflags work and we don't have any other mechanism right now.
+ # Why do we need this here and not for alpha*-*-* below?
+ gmp_xoptcflags_gcc="-Wa,-arch,${flavour} -Wa,-m${flavour}"
+ gmp_optcflags_cc="-arch $flavour -tune $flavour"
+ fi
+ ;;
+ alpha*-*-*)
+ cclist="gcc"
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ fi
+ ;;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ # Don't inherit default gmp_cflags_cc value; it comes with -g which
+ # disables all optimization on Cray vector systems
+ gmp_cflags_cc="-O"
+ ;;
+
+ # AMD and Intel x86 configurations
+ i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*)
+ # Rumour has it -O2 used to give worse register allocation than just -O.
+ gmp_cflags_gcc="-g -O -fomit-frame-pointer"
+
+ case "${target}" in
+ i386*-*-*) gmp_optcflags_gcc="-mcpu=i386 -march=i386";;
+ i486*-*-*) gmp_optcflags_gcc="-mcpu=i486 -march=i486";;
+ i586*-*-* | pentium-*-* | pentiummmx-*-*)
+ gmp_optcflags_gcc="-mcpu=pentium -march=pentium";;
+
+ # -march=pentiumpro not used because mpz/powm.c (swox cvs rev 1.4)
+ # tickles a bug in gcc 2.95.2 (believed fixed in 2.96).
+ i686*-*-* | pentiumpro-*-* | pentium[23]-*-*)
+ gmp_optcflags_gcc="-mcpu=pentiumpro";;
+
+ k6*-*-*) gmp_optcflags_gcc="-mcpu=k6 -march=k6";;
+
+ # Athlon instruction costs are close to p6: 3 cycle load latency, 4-6
+ # cycle mul, 40 cycle div, pairable adc, ...
+ # FIXME: Change this when gcc gets something specific for Athlon.
+ # -march=pentiumpro not used, per i686 above.
+ athlon-*-*) gmp_optcflags_gcc="-mcpu=pentiumpro";;
+ esac
+ ;;
+
+ # Sparc
+ ultrasparc*-*-solaris2.[7-9] | sparcv9-*-solaris2.[7-9])
+ os_64bit=yes
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ gmp_cflags64_cc="-xtarget=native -xarch=v9 -xO4"
+ ;;
+ sparc64-*-linux*)
+ # Need to think more about the options passed here. This isn't good for
+ # some sparc64 linux distros, since we end up not optimizing when all the
+ # options below fail.
+ os_64bit=yes
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_gcc="$gmp_cflags_gcc -m32"
+ gmp_xoptflags_gcc="-mcpu=ultrasparc -mvis"
+ ;;
+ ultrasparc*-*-* | sparcv9-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-solaris2.* | microsparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-* | microsparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92"
+ gmp_cflags_cc="-O2" # FIXME: Flag for v8?
+ ;;
+ supersparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4 -DSUPERSPARC"
+ ;;
+ supersparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92 -DSUPERSPARC"
+ gmp_cflags_cc="-O2 -DSUPERSPARC" # FIXME: Flag for v8?
+ ;;
+ *sparc*-*-*)
+ cclist="gcc acc cc"
+ gmp_cflags_acc="-g -O2"
+ gmp_cflags_cc="-g -O2"
+ ;;
+
+ # POWER/PowerPC
+ powerpc64-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -maix64 -mpowerpc64"
+ gmp_cflags_xlc="-g -O2 -q64 -qtune=pwr3"
+ ;;
+ powerpc*-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=ppc -O2"
+ ;;
+ power-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpower"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=pwr -O2"
+ ;;
+ powerpc64*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc64"
+ cat >>confdefs.h <<\EOF
+#define _LONG_LONG_LIMB 1
+EOF
+ ;;
+ powerpc-apple-darwin* | powerpc-apple-macosx*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc -traditional-cpp"
+ ;;
+ powerpc*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ ;;
+
+ # MIPS
+ mips-sgi-irix6.*)
+ os_64bit=yes
+ gmp_cflags64_gcc="-g -O2 -mabi=n32"
+ gmp_cflags64_cc="$gmp_cflags64_cc -O2 -n32"
+ ;;
+
+ # Motorola 68k family
+ m88110*-*-*)
+ gmp_cflags_gcc="-g -O -m88110" ;;
+ m68*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # HP
+ hppa1.0*-*-*)
+ cclist="gcc c89 cc"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+ hppa2.0w*-*-*)
+ cclist="c89 cc"
+ gmp_cflags_c89="+DD64 +O3"
+ gmp_cflags_cc="+DD64 +O3"
+ ;;
+ hppa2.0*-*-*)
+ os_64bit=yes
+ cclist="gcc c89 cc"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -mWHAT -D_LONG_LONG_LIMB"
+ # +O2 to cc triggers bug in mpz/powm.c (1.4)
+ gmp_cflags64_c89="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags64_cc="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+
+ # VAX
+ vax*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # Fujitsu
+ f30[01]-fujitsu-sysv*)
+ cclist="gcc vcc"
+ gmp_cflags_vcc="-g" # FIXME: flags for vcc?
+ ;;
+esac
+
+case "${target}" in
+ *-*-mingw32) gmp_cflags_gcc="$gmp_cflags_gcc -mno-cygwin";;
+esac
+
+echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" 1>&6
+echo "configure:1352: checking for BSD-compatible nm" 1>&5
+if test "${ac_cv_path_NM+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$NM"; then
+ # Let the user override the test.
+ ac_cv_path_NM="$NM"
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext ; then
+ # Check to see if the nm accepts a BSD-compat flag.
+ # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+ # nm: unknown option "B" ignored
+ if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -B"
+ break
+ elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -p"
+ break
+ else
+ ac_cv_path_NM=${ac_cv_path_NM="$ac_dir/nm"} # keep the first match, but
+ continue # so that we can try to find one that supports BSD flags
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+ test -z "$ac_cv_path_NM" && ac_cv_path_NM=nm
+fi
+fi
+
+NM="$ac_cv_path_NM"
+echo "$ECHO_T""$NM" 1>&6
+ # nm on 64-bit AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ NM="$NM -X 64"
+ ;;
+esac
+
+# Save CFLAGS given on command line.
+gmp_user_CFLAGS="$CFLAGS"
+
+if test -z "$CC"; then
+ # Find compiler.
+
+if test $host != $build; then
+ ac_tool_prefix=${host_alias}-
+else
+ ac_tool_prefix=
+fi
+
+gmp_cc_list="$cclist"
+gmp_req_64bit_cc="$os_64bit"
+
+CC32=
+CC64=
+for c in $gmp_cc_list; do
+ # Avoid cache hits.
+ unset CC
+ unset ac_cv_prog_CC
+
+# Extract the first word of "${ac_tool_prefix}$c", so it can be a program name with args.
+set dummy ${ac_tool_prefix}$c; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:1418: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_CC="${ac_tool_prefix}$c"
+ break
+ done
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+ echo "$ECHO_T""$CC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$ac_cv_prog_CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "$c", so it can be a program name with args.
+set dummy $c; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:1452: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_CC="$c"
+ break
+ done
+ test -z "$ac_cv_prog_CC" && ac_cv_prog_CC="$c"
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+ echo "$ECHO_T""$CC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ CC="$c"
+ fi
+fi
+
+ if test -n "$CC"; then
+ eval c_flags=\$gmp_cflags_$c
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+ CC="$CC"
+CFLAGS="$c_flags"
+echo $ECHO_N "checking if the C compiler ($CC) works with flags $CFLAGS... $ECHO_C" 1>&6
+echo "configure:1498: checking if the C compiler ($CC) works with flags $CFLAGS" 1>&5
+
+# Simple test for all targets.
+cat >conftest.$ac_ext <<EOF
+
+#line 1503 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1508: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ tmp_works=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ tmp_cross=no
+ else
+ tmp_cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ tmp_works=no
+fi
+rm -fr conftest*
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ cat >conftest.$ac_ext <<EOF
+#line 1528 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+} void *g(); void *f() { return g(); } int bar(){
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:1539: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ tmp_works=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ tmp_works=no
+fi
+rm -f conftest*
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ gmp_prog_cc_works=yes
+else
+ gmp_prog_cc_works=no
+fi
+
+echo "$ECHO_T""$tmp_works" 1>&6
+
+ if test "$gmp_prog_cc_works" != "yes"; then
+ continue
+ fi
+
+ # Save first working compiler, whether 32- or 64-bit capable.
+ if test -z "$CC32"; then
+ CC32="$CC"
+ fi
+ if test "$gmp_req_64bit_cc" = "yes"; then
+ eval c_flags=\$gmp_cflags64_$c
+
+ # Verify that the compiler works in 64-bit mode as well.
+ # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link.
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+ CC="$c"
+CFLAGS="$c_flags"
+echo $ECHO_N "checking if the C compiler ($CC) works with flags $CFLAGS... $ECHO_C" 1>&6
+echo "configure:1583: checking if the C compiler ($CC) works with flags $CFLAGS" 1>&5
+
+# Simple test for all targets.
+cat >conftest.$ac_ext <<EOF
+
+#line 1588 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1593: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ tmp_works=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ tmp_cross=no
+ else
+ tmp_cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ tmp_works=no
+fi
+rm -fr conftest*
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ cat >conftest.$ac_ext <<EOF
+#line 1613 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+} void *g(); void *f() { return g(); } int bar(){
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:1624: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ tmp_works=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ tmp_works=no
+fi
+rm -f conftest*
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ gmp_prog_cc_works=yes
+else
+ gmp_prog_cc_works=no
+fi
+
+echo "$ECHO_T""$tmp_works" 1>&6
+
+ if test "$gmp_prog_cc_works" = "yes"; then
+
+ gmp_tmp_CC_save="$CC"
+ CC="$c"
+ echo $ECHO_N "checking whether the C compiler ($CC) is 64-bit capable... $ECHO_C" 1>&6
+echo "configure:1651: checking whether the C compiler ($CC) is 64-bit capable" 1>&5
+ if test -z "$NM"; then
+ echo; echo "configure: GMP_CHECK_CC_64BIT: fatal: need nm"
+ exit 1
+ fi
+ gmp_tmp_CFLAGS_save="$CFLAGS"
+ CFLAGS="$c_flags"
+
+ case "$target" in
+ hppa2.0*-*-*)
+ # FIXME: If gcc is installed under another name than "gcc", we will
+ # test the wrong thing.
+ if test "$CC" != "gcc"; then
+ echo >conftest.c
+ gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"`
+ rm conftest*
+ gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'`
+ gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+ gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+ gmp_cv_cc_64bit=no
+ test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \
+ && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \
+ && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \
+ && gmp_cv_cc_64bit=yes
+ else # gcc
+ # FIXME: Compile a minimal file and determine if the resulting object
+ # file is an ELF file. If so, gcc can produce 64-bit code.
+ # Do we have file(1) for target?
+ gmp_cv_cc_64bit=no
+ fi
+ ;;
+ mips-sgi-irix6.*)
+ # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit
+ # arithmetic but not 64-bit pointers, so the general test for sizeof
+ # (void *) is not valid.
+ # Simply try to compile an empty main. If that succeeds return
+ # true.
+ cat >conftest.$ac_ext <<EOF
+#line 1689 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:1700: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ gmp_cv_cc_64bit=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ gmp_cv_cc_64bit=no
+fi
+rm -f conftest*
+ ;;
+ *-*-*)
+ # Allocate an array of size sizeof (void *) and use nm to determine its
+ # size. We depend on the first declared variable being put at address 0.
+ cat >conftest.c <<EOF
+char arr[sizeof (void *)]={0};
+char post=0;
+EOF
+ gmp_compile="$CC $CFLAGS -c conftest.c 1>&5"
+ if { (eval echo configure:1719: \"$gmp_compile\") 1>&5; (eval $gmp_compile) 2>&5; }; then
+ gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ if test "$gmp_tmp_val" = "8"; then
+ gmp_cv_cc_64bit=yes
+ else
+ gmp_cv_cc_64bit=no
+ fi
+ else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ gmp_cv_cc_64bit=no
+ fi
+ rm -f conftest*
+ ;;
+ esac
+
+ CC="$gmp_tmp_CC_save"
+ CFLAGS="$gmp_tmp_CFLAGS_save"
+ echo "$ECHO_T""$gmp_cv_cc_64bit" 1>&6
+
+ if test "$gmp_cv_cc_64bit" = "yes"; then
+ test -z "$CC64" && CC64="$c"
+ test -z "$CFLAGS64" && CFLAGS64="$c_flags"
+ # We have CC64 so we're done.
+ break
+ fi
+ fi
+ else
+ # We have CC32, and we don't need a 64-bit compiler so we're done.
+ break
+ fi
+ fi
+done
+CC="$CC32"
+
+ # If 64-bit OS and we have a 64-bit compiler, use it.
+ if test -n "$os_64bit" && test -n "$CC64"; then
+ CC=$CC64
+ CFLAGS=$CFLAGS64
+ else
+ eval CFLAGS=\$gmp_cflags_$CC
+ fi
+
+ # Try compiler flags that may work with only some compiler versions.
+ # gmp_optcflags: All or nothing.
+ eval optcflags=\$gmp_optcflags_$CC
+ if test -n "$optcflags"; then
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $optcflags"
+ echo $ECHO_N "checking whether $CC accepts $optcflags... $ECHO_C" 1>&6
+echo "configure:1770: checking whether $CC accepts $optcflags" 1>&5
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ cat >conftest.$ac_ext <<EOF
+
+#line 1780 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1785: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ optok=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ cross=no
+ else
+ cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ optok=no
+fi
+rm -fr conftest*
+ if test "$optok" = "yes"; then
+ echo "$ECHO_T""yes" 1>&6
+ else
+ echo "$ECHO_T""no" 1>&6
+ CFLAGS="$CFLAGS_save"
+ fi
+ fi
+ # gmp_xoptcflags: First is best, one has to work.
+ eval xoptcflags=\$gmp_xoptcflags_$CC
+ if test -n "$xoptcflags"; then
+ gmp_found="no"
+ for xopt in $xoptcflags; do
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $xopt"
+ echo $ECHO_N "checking whether $CC accepts $xopt... $ECHO_C" 1>&6
+echo "configure:1814: checking whether $CC accepts $xopt" 1>&5
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ cat >conftest.$ac_ext <<EOF
+
+#line 1824 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1829: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ optok=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ cross=no
+ else
+ cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ optok=no
+fi
+rm -fr conftest*
+ if test "$optok" = "yes"; then
+ echo "$ECHO_T""yes" 1>&6
+ gmp_found="yes"
+ break
+ else
+ echo "$ECHO_T""no" 1>&6
+ CFLAGS="$CFLAGS_save"
+ fi
+ done
+ if test "$gmp_found" = "no"; then
+ echo "$0: fatal: need a compiler that understands one of $xoptcflags"
+ exit 1
+ fi
+ fi
+fi
+
+# Restore CFLAGS given on command line.
+# FIXME: We've run through quite some unnecessary code looking for a
+# nice compiler and working flags for it, just to spoil that with user
+# supplied flags.
+test -n "$gmp_user_CFLAGS" && CFLAGS="$gmp_user_CFLAGS"
+
+# Select chosen compiler.
+
+echo $ECHO_N "checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) works... $ECHO_C" 1>&6
+echo "configure:1868: checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) works" 1>&5
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+cat >conftest.$ac_ext <<EOF
+
+#line 1879 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1884: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ ac_cv_prog_cc_works=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ ac_cv_prog_cc_cross=no
+ else
+ ac_cv_prog_cc_cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ ac_cv_prog_cc_works=no
+fi
+rm -fr conftest*
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+echo "$ECHO_T""$ac_cv_prog_cc_works" 1>&6
+if test $ac_cv_prog_cc_works = no; then
+ { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 77; }
+fi
+echo $ECHO_N "checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) is a cross-compiler... $ECHO_C" 1>&6
+echo "configure:1910: checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) is a cross-compiler" 1>&5
+echo "$ECHO_T""$ac_cv_prog_cc_cross" 1>&6
+cross_compiling=$ac_cv_prog_cc_cross
+
+echo $ECHO_N "checking whether we are using GNU C... $ECHO_C" 1>&6
+echo "configure:1915: checking whether we are using GNU C" 1>&5
+if test "${ac_cv_prog_gcc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.c <<EOF
+#ifdef __GNUC__
+ yes;
+#endif
+EOF
+if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:1924: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
+ ac_cv_prog_gcc=yes
+else
+ ac_cv_prog_gcc=no
+fi
+fi
+echo "$ECHO_T""$ac_cv_prog_gcc" 1>&6
+
+if test "$ac_cv_prog_gcc" = "yes"; then
+ GCC=yes
+else
+ GCC=
+fi
+
+# Set CFLAGS if not already set.
+if test -z "$CFLAGS"; then
+ CFLAGS="-g"
+ if test "$GCC" = "yes"; then
+ CFLAGS="$CFLAGS -O2"
+ fi
+fi
+
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ ac_cv_prog_CC="$CC"
+fi
+
+# How to assemble.
+CCAS="$CC -c"
+
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" 1>&6
+echo "configure:1956: checking how to run the C preprocessor" 1>&5
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+if test "${ac_cv_prog_CPP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ # This must be in double quotes, not single quotes, because CPP may get
+ # substituted into the Makefile and "${CC-cc}" will confuse make.
+ CPP="${CC-cc} -E"
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp.
+
+cat >conftest.$ac_ext <<EOF
+#line 1972 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1978: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ CPP="${CC-cc} -E -traditional-cpp"
+
+cat >conftest.$ac_ext <<EOF
+#line 1990 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1996: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ CPP="${CC-cc} -nologo -E"
+
+cat >conftest.$ac_ext <<EOF
+#line 2008 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:2014: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ CPP=/lib/cpp
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+ ac_cv_prog_CPP="$CPP"
+fi
+ CPP="$ac_cv_prog_CPP"
+else
+ ac_cv_prog_CPP="$CPP"
+fi
+echo "$ECHO_T""$CPP" 1>&6
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo $ECHO_N "checking for a BSD compatible install... $ECHO_C" 1>&6
+echo "configure:2050: checking for a BSD compatible install" 1>&5
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":"
+ for ac_dir in $PATH; do
+ # Account for people who put trailing slashes in PATH elements.
+ case "$ac_dir/" in
+ /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ if test -f $ac_dir/$ac_prog; then
+ if test $ac_prog = install &&
+ grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ ac_cv_path_install="$ac_dir/$ac_prog -c"
+ break 2
+ fi
+ fi
+ done
+ ;;
+ esac
+ done
+ IFS="$ac_save_IFS"
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL="$ac_cv_path_install"
+ else
+ # As a last resort, use the slow shell script. We don't cache a
+ # path for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the path is relative.
+ INSTALL="$ac_install_sh"
+ fi
+fi
+echo "$ECHO_T""$INSTALL" 1>&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+echo $ECHO_N "checking whether ln -s works... $ECHO_C" 1>&6
+echo "configure:2107: checking whether ln -s works" 1>&5
+if test "${ac_cv_prog_LN_S+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ rm -f conftestdata
+if ln -s X conftestdata 2>/dev/null
+then
+ rm -f conftestdata
+ ac_cv_prog_LN_S="ln -s"
+else
+ ac_cv_prog_LN_S=ln
+fi
+fi
+LN_S="$ac_cv_prog_LN_S"
+if test "$ac_cv_prog_LN_S" = "ln -s"; then
+ echo "$ECHO_T""yes" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+echo $ECHO_N "checking for suitable m4... $ECHO_C" 1>&6
+echo "configure:2128: checking for suitable m4" 1>&5
+if test "${gmp_cv_prog_m4+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$M4"; then
+ gmp_cv_prog_m4="$M4"
+else
+ cat >conftest.m4 <<\EOF
+define(dollarhash,``$#'')dnl
+ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $# not supported (SunOS /usr/bin/m4)
+'')dnl
+ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')dnl
+ifelse(t1`'t2,YY,`good
+')dnl
+EOF
+ echo "trying m4" 1>&5
+ gmp_tmp_val="`(m4 conftest.m4) 2>&5`"
+ echo "$gmp_tmp_val" 1>&5
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="m4"
+ else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ ac_dummy="$PATH:/usr/5bin"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ echo "trying $ac_dir/m4" 1>&5
+ gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&5`"
+ echo "$gmp_tmp_val" 1>&5
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="$ac_dir/m4"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ if test -z "$gmp_cv_prog_m4"; then
+ { echo "configure: error: No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." 1>&2; exit 1; }
+ fi
+ fi
+ rm -f conftest.m4
+fi
+fi
+echo "$ECHO_T""$gmp_cv_prog_m4" 1>&6
+M4="$gmp_cv_prog_m4"
+
+# Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:2178: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_AR+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$AR"; then
+ ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_AR="ar"
+ break
+ done
+fi
+fi
+AR="$ac_cv_prog_AR"
+if test -n "$AR"; then
+ echo "$ECHO_T""$AR" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+# ar on AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ AR="$AR -X 64"
+ ;;
+esac
+
+if test "$gmp_no_asm_syntax_testing" != "yes"; then
+ echo $ECHO_N "checking how to switch to text section... $ECHO_C" 1>&6
+echo "configure:2216: checking how to switch to text section" 1>&5
+if test "${gmp_cv_check_asm_text+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-aix*)
+
+ gmp_cv_check_asm_text=".csect .text[PR]"
+
+ ;;
+ *-*-hpux*) gmp_cv_check_asm_text=".code" ;;
+ *) gmp_cv_check_asm_text=".text" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_text" 1>&6
+echo "define(<TEXT>, <$gmp_cv_check_asm_text>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking how to switch to data section... $ECHO_C" 1>&6
+echo "configure:2235: checking how to switch to data section" 1>&5
+if test "${gmp_cv_check_asm_data+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-aix*)
+
+ gmp_cv_check_asm_data=".csect .data[RW]"
+
+ ;;
+ *) gmp_cv_check_asm_data=".data" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_data" 1>&6
+echo "define(<DATA>, <$gmp_cv_check_asm_data>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking how to export a symbol... $ECHO_C" 1>&6
+echo "configure:2253: checking how to export a symbol" 1>&5
+if test "${gmp_cv_check_asm_globl+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_globl=".export" ;;
+ *) gmp_cv_check_asm_globl=".globl" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_globl" 1>&6
+echo "define(<GLOBL>, <$gmp_cv_check_asm_globl>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking what assembly label suffix to use... $ECHO_C" 1>&6
+echo "configure:2267: checking what assembly label suffix to use" 1>&5
+if test "${gmp_cv_check_asm_label_suffix+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_label_suffix="" ;;
+ *) gmp_cv_check_asm_label_suffix=":" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_label_suffix" 1>&6
+echo "define(<LABEL_SUFFIX>, <\$1$gmp_cv_check_asm_label_suffix>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking how the .type assembly directive should be used... $ECHO_C" 1>&6
+echo "configure:2281: checking how the .type assembly directive should be used" 1>&5
+if test "${gmp_cv_check_asm_type+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+for gmp_tmp_prefix in @ \# %; do
+ echo " .type sym,${gmp_tmp_prefix}function" > conftest.s
+ if { (eval echo configure:2288: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_type=".type \$1,${gmp_tmp_prefix}\$2"
+ break
+ fi
+done
+if test -z "$gmp_cv_check_asm_type"; then
+ gmp_cv_check_asm_type="dnl"
+fi
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_type" 1>&6
+echo "define(<TYPE>, <$gmp_cv_check_asm_type>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking if the .size assembly directive works... $ECHO_C" 1>&6
+echo "configure:2302: checking if the .size assembly directive works" 1>&5
+if test "${gmp_cv_check_asm_size+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+echo ' .size sym,1' > conftest.s
+if { (eval echo configure:2308: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_size=".size \$1,\$2"
+else
+ gmp_cv_check_asm_size="dnl"
+fi
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_size" 1>&6
+echo "define(<SIZE>, <$gmp_cv_check_asm_size>)" >> $gmp_tmpconfigm4
+
+echo $ECHO_N "checking what prefix to use for a local label... $ECHO_C" 1>&6
+echo "configure:2319: checking what prefix to use for a local label" 1>&5
+if test "${gmp_cv_check_asm_lsym_prefix+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$NM"; then
+ echo; echo "GMP_CHECK_ASM_LSYM_PREFIX: fatal: need nm"
+ exit 1
+fi
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+gmp_cv_check_asm_lsym_prefix="L"
+for gmp_tmp_pre in L .L $ L$; do
+ cat > conftest.s <<EOF
+dummy${gmp_cv_check_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if { (eval echo configure:2335: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ $NM conftest.o >/dev/null 2>&1
+ gmp_rc=$?
+ if test "$gmp_rc" != "0"; then
+ echo "configure: $NM failure, using default"
+ break
+ fi
+ if $NM conftest.o | grep gurkmacka >/dev/null; then true; else
+ gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre"
+ break
+ fi
+ else
+ echo "configure: failed program was:" >&5
+ cat conftest.s >&5
+ # Use default.
+ fi
+done
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_lsym_prefix" 1>&6
+echo "define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)" >> $gmp_tmpconfigm4
+
+echo $ECHO_N "checking how to define a 32-bit word... $ECHO_C" 1>&6
+echo "configure:2359: checking how to [define] a 32-bit word" 1>&5
+if test "${gmp_cv_check_asm_w32+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$NM"; then
+ echo; echo "configure: GMP_CHECK_ASM_W32: fatal: need nm"
+ exit 1
+fi
+
+# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+# that it's at 0x0. We'll have to declare another symbol before the
+# .long/.word and look at the distance between the two symbols. The
+# only problem is that the sed expression(s) barfs (on Solaris, for
+# example) for the symbol with value 0. For now, HPUX uses .word.
+
+case "$target" in
+ *-*-hpux*)
+ gmp_cv_check_asm_w32=".word"
+ ;;
+ *-*-*)
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+ for gmp_tmp_op in .long .word; do
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ $gmp_cv_check_asm_globl foo
+ $gmp_tmp_op 0
+foo${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if { (eval echo configure:2388: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ if test "$gmp_tmp_val" = "4"; then
+ gmp_cv_check_asm_w32="$gmp_tmp_op"
+ break
+ fi
+ fi
+ done
+ ;;
+esac
+
+if test -z "$gmp_cv_check_asm_w32"; then
+ echo; echo "configure: GMP_CHECK_ASM_W32: fatal: do not know how to define a 32-bit word"
+ exit 1
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_w32" 1>&6
+echo "define(<W32>, <$gmp_cv_check_asm_w32>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking if symbols are prefixed by underscore... $ECHO_C" 1>&6
+echo "configure:2412: checking if symbols are prefixed by underscore" 1>&5
+if test "${gmp_cv_check_asm_underscore+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2417 "configure"
+#include "confdefs.h"
+int underscore_test() {
+return; }
+EOF
+if { (eval echo configure:2422: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ if grep _underscore_test conftest* >/dev/null; then
+ gmp_cv_check_asm_underscore=yes
+ else
+ gmp_cv_check_asm_underscore=no
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_underscore" 1>&6
+if test "$gmp_cv_check_asm_underscore" = "yes"; then
+
+echo 'define(<GSYM_PREFIX>, <_>)' >> $gmp_tmpconfigm4
+
+ underscore=yes
+else
+
+echo 'define(<GSYM_PREFIX>, <>)' >> $gmp_tmpconfigm4
+
+ underscore=no
+fi
+
+echo $ECHO_N "checking if .align assembly directive is logarithmic... $ECHO_C" 1>&6
+echo "configure:2449: checking if .align assembly directive is logarithmic" 1>&5
+if test "${gmp_cv_check_asm_align_log+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$NM"; then
+ echo; echo "configure: GMP_CHECK_ASM_ALIGN_LOG: fatal: need nm"
+ exit 1
+fi
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ .align 4
+ $gmp_cv_check_asm_globl foo
+ .byte 1
+ .align 4
+foo$gmp_cv_check_asm_label_suffix
+ .byte 2
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+if { (eval echo configure:2467: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+ gmp_cv_check_asm_align_log=yes
+ else
+ gmp_cv_check_asm_align_log=no
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.s >&5
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_align_log" 1>&6
+
+echo "define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)" >> $gmp_tmpconfigm4
+
+if test "$gmp_cv_check_asm_align_log" = "yes"; then
+ asm_align=log
+else
+ asm_align=nolog
+fi
+
+fi
+
+family=generic
+
+case ${target} in
+ arm*-*-*)
+ path="arm"
+ ;;
+ sparcv9*-*-solaris2.[789]* | sparc64*-*-solaris2.[789]* | ultrasparc*-*-solaris2.[789]*)
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparc64-*-linux*)
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparcv8*-*-* | microsparc*-*-*)
+ path="sparc32/v8 sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-*)
+ path="sparc32/v9 sparc32/v8 sparc32"
+ extra_functions="udiv_fp"
+ ;;
+ supersparc*-*-*)
+ path="sparc32/v8/supersparc sparc32/v8 sparc32"
+ extra_functions="udiv"
+ ;;
+ sparc*-*-*) path="sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ hppa7000*-*-*)
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa1.0*-*-*)
+ path="hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa2.0w-*-*)
+ path="pa64w"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ ;;
+ hppa2.0*-*-*)
+ if test -n "$CC64"; then
+ path="pa64"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ # We need to use the system compiler, or actually the system assembler,
+ # since GAS has not been ported to understand the 2.0 instructions.
+ CCAS="$CC64 -c"
+ else
+ # FIXME: path should be "hppa/hppa2_0 hppa/hppa1_1 hppa"
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ fi
+ ;;
+ hppa*-*-*) #assume pa7100
+ path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd";;
+ f30[01]-fujitsu-sysv*)
+ path=fujitsu;;
+ alphaev6*-*-*) path="alpha/ev6 alpha"; extra_functions="invert_limb cntlz";;
+ alphaev5*-*-*) path="alpha/ev5 alpha"; extra_functions="invert_limb cntlz";;
+ alpha*-*-*) path="alpha"; extra_functions="invert_limb cntlz";;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ path="cray"
+ extra_functions="mulww";;
+ am29000*-*-*) path="a29k";;
+ a29k*-*-*) path="a29k";;
+
+ # AMD and Intel x86 configurations
+
+ i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*)
+ gmp_m4postinc="x86/x86-defs.m4"
+ extra_functions="udiv umul"
+ CALLING_CONVENTIONS_OBJS="x86call.o x86check.o"
+
+echo $ECHO_N "checking if the assembler takes cl with shldl... $ECHO_C" 1>&6
+echo "configure:2583: checking if the assembler takes cl with shldl" 1>&5
+if test "${gmp_cv_check_asm_shldl_cl+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ shldl %cl, %eax, %ebx
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+if { (eval echo configure:2592: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_shldl_cl=yes
+else
+ gmp_cv_check_asm_shldl_cl=no
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_shldl_cl" 1>&6
+if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then
+
+echo 'define(<WANT_SHLDL_CL>, <1>)' >> $gmp_tmpconfigm4
+
+else
+
+echo 'define(<WANT_SHLDL_CL>, <0>)' >> $gmp_tmpconfigm4
+
+fi
+
+ echo $ECHO_N "checking if the .align directive accepts an 0x90 fill in .text... $ECHO_C" 1>&6
+echo "configure:2612: checking if the .align directive accepts an 0x90 fill in .text" 1>&5
+if test "${gmp_cv_check_asm_align_fill_0x90+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ .align 4, 0x90
+ .byte 0
+ .align 4, 0x90
+EOF
+gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`"
+if test $? = 0; then
+ echo "$gmp_tmp_val" 1>&5
+ if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then
+ echo "Supressing this warning by omitting 0x90" 1>&5
+ gmp_cv_check_asm_align_fill_0x90=no
+ else
+ gmp_cv_check_asm_align_fill_0x90=yes
+ fi
+else
+ echo "Non-zero exit code" 1>&5
+ echo "$gmp_tmp_val" 1>&5
+ gmp_cv_check_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_align_fill_0x90" 1>&6
+
+echo "define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)" >> $gmp_tmpconfigm4
+
+ # the CPUs below wanting to know about mmx
+ case ${target} in
+ pentiummmx-*-* | pentium[23]-*-* | k6*-*-* | athlon-*-*)
+
+echo $ECHO_N "checking if the assembler knows about MMX instructions... $ECHO_C" 1>&6
+echo "configure:2649: checking if the assembler knows about MMX instructions" 1>&5
+if test "${gmp_cv_check_asm_mmx+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ por %mm0, %mm0
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+if { (eval echo configure:2658: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_mmx=yes
+else
+ gmp_cv_check_asm_mmx=no
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_mmx" 1>&6
+if test "$gmp_cv_check_asm_mmx" = "yes"; then
+ tmp_mmx=yes
+else
+ echo "configure: warning: +----------------------------------------------------------" 1>&2
+ echo "configure: warning: | WARNING WARNING WARNING" 1>&2
+ echo "configure: warning: | Target CPU has MMX code, but it can't be assembled by" 1>&2
+ echo "configure: warning: | $CCAS $CFLAGS" 1>&2
+ echo "configure: warning: | Non-MMX replacements will be used." 1>&2
+ echo "configure: warning: | This will be an inferior build." 1>&2
+ echo "configure: warning: +----------------------------------------------------------" 1>&2
+ tmp_mmx=no
+fi
+
+ ;;
+ esac
+
+ # default for anything not otherwise mentioned
+ path="x86"
+
+ case ${target} in
+ i[34]86*-*-*)
+ path="x86"
+ ;;
+ k5*-*-*)
+ # don't know what best suits k5
+ path="x86"
+ ;;
+ i586*-*-* | pentium-*-*)
+ path="x86/pentium x86"
+ ;;
+ pentiummmx-*-*)
+ path="x86/pentium x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/pentium/mmx $path"
+ fi
+ ;;
+ i686*-*-* | pentiumpro-*-*)
+ path="x86/p6 x86"
+ ;;
+ pentium2-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ pentium3-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/p3mmx x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ k6[23]*-*-*)
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/k62mmx x86/k6/mmx $path"
+ fi
+ ;;
+ k6*-*-*)
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/mmx $path"
+ fi
+ ;;
+ athlon-*-*)
+ path="x86/k7 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k7/mmx $path"
+ fi
+ ;;
+ esac
+ ;;
+
+ i960*-*-*) path="i960";;
+
+ ia64*-*-*) path="ia64";;
+
+# Motorola 68k configurations. Let m68k mean 68020-68040.
+ m680[234]0*-*-* | m68k*-*-* | \
+ m68*-next-nextstep*) # Nexts are at least '020
+ path="m68k/mc68020 m68k"
+ family=m68k
+ ;;
+ m68000*-*-*)
+ path="m68k"
+ family=m68k
+ ;;
+
+ m88k*-*-* | m88k*-*-*) path="m88k";;
+ m88110*-*-*) path="m88k/mc88110 m88k";;
+ ns32k*-*-*) path="ns32k";;
+
+ pyramid-*-*) path="pyr";;
+
+ ppc601-*-*) path="power powerpc32";;
+ powerpc64*-*-*) path="powerpc64";;
+ powerpc*-*-*) path="powerpc32";;
+ rs6000-*-* | power-*-* | power2-*-*)
+ path="power"
+ extra_functions="udiv_w_sdiv"
+ ;;
+
+ sh-*-*) path="sh";;
+ sh2-*-*) path="sh/sh2 sh";;
+
+ mips[34]*-*-*) path="mips3";;
+ mips*-*-irix6*) path="mips3";;
+ mips*-*-*) path="mips2";;
+
+ vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv";;
+
+ z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv";;
+ z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv";;
+
+ clipper*-*-*) path="clipper";;
+esac
+
+if test -n "$CALLING_CONVENTIONS_OBJS"; then
+ cat >>confdefs.h <<\EOF
+#define HAVE_CALLING_CONVENTIONS 1
+EOF
+
+fi
+
+case ${target} in
+ i[5-8]86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*)
+ # rdtsc is in pentium and up, not in i386 and i486
+ SPEED_CYCLECOUNTER_OBJS=pentium.lo
+ ;;
+ alpha*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=alpha.lo
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-* | sparc64*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=sparcv9.lo
+ ;;
+ hppa2*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa2.lo
+ ;;
+ hppa*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa.lo
+ ;;
+esac
+
+if test -n "$SPEED_CYCLECOUNTER_OBJS"
+then
+ cat >>confdefs.h <<\EOF
+#define HAVE_SPEED_CYCLECOUNTER 1
+EOF
+
+fi
+
+echo $ECHO_N "checking for Cygwin environment... $ECHO_C" 1>&6
+echo "configure:2822: checking for Cygwin environment" 1>&5
+if test "${ac_cv_cygwin+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 2827 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+#ifndef __CYGWIN__
+# define __CYGWIN__ __CYGWIN32__
+#endif
+return __CYGWIN__;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:2841: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_cygwin=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_cygwin=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_cygwin" 1>&6
+CYGWIN=
+test "$ac_cv_cygwin" = yes && CYGWIN=yes
+echo $ECHO_N "checking for mingw32 environment... $ECHO_C" 1>&6
+echo "configure:2856: checking for mingw32 environment" 1>&5
+if test "${ac_cv_mingw32+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 2861 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+return __MINGW32__;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:2872: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_mingw32=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_mingw32=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_mingw32" 1>&6
+MINGW32=
+test "$ac_cv_mingw32" = yes && MINGW32=yes
+echo $ECHO_N "checking for EMX OS/2 environment... $ECHO_C" 1>&6
+echo "configure:2887: checking for EMX OS/2 environment" 1>&5
+if test "${ac_cv_emxos2+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 2892 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+return __EMX__;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:2903: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_emxos2=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_emxos2=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_emxos2" 1>&6
+EMXOS2=
+test "$ac_cv_emxos2" = yes && EMXOS2=yes
+
+echo $ECHO_N "checking for executable suffix... $ECHO_C" 1>&6
+echo "configure:2919: checking for executable suffix" 1>&5
+if test "${ac_cv_exeext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test "$CYGWIN" = yes || test "$MINGW32" = yes || test "$EMXOS2" = yes; then
+ ac_cv_exeext=.exe
+else
+ rm -f conftest*
+ echo 'int main () { return 0; }' >conftest.$ac_ext
+ ac_cv_exeext=
+ if { (eval echo configure:2929: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then
+ for ac_file in conftest.*; do
+ case $ac_file in
+ *.c | *.C | *.o | *.obj | *.xcoff) ;;
+ *) ac_cv_exeext=`echo $ac_file | sed -e s/conftest//` ;;
+ esac
+ done
+ else
+ { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; }
+ fi
+ rm -f conftest*
+ test x"${ac_cv_exeext}" = x && ac_cv_exeext=no
+fi
+fi
+
+EXEEXT=""
+test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext}
+echo "$ECHO_T""${ac_cv_exeext}" 1>&6
+ac_exeext=$EXEEXT
+
+echo $ECHO_N "checking for object suffix... $ECHO_C" 1>&6
+echo "configure:2950: checking for object suffix" 1>&5
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ rm -f conftest*
+echo 'int i = 1;' >conftest.$ac_ext
+if { (eval echo configure:2956: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ for ac_file in conftest.*; do
+ case $ac_file in
+ *.c) ;;
+ *) ac_cv_objext=`echo $ac_file | sed -e s/conftest.//` ;;
+ esac
+ done
+else
+ { echo "configure: error: installation or configuration problem; compiler does not work" 1>&2; exit 1; }
+fi
+rm -f conftest*
+fi
+
+echo "$ECHO_T""$ac_cv_objext" 1>&6
+OBJEXT=$ac_cv_objext
+ac_objext=$ac_cv_objext
+
+case "$target" in
+ *-*-aix4.[3-9]*) enable_shared=no ;;
+esac
+# Check whether --enable-shared or --disable-shared was given.
+if test "${enable_shared+set}" = set; then
+ enableval="$enable_shared"
+ p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_shared=yes ;;
+no) enable_shared=no ;;
+*)
+ enable_shared=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_shared=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac
+else
+ enable_shared=yes
+fi
+# Check whether --enable-static or --disable-static was given.
+if test "${enable_static+set}" = set; then
+ enableval="$enable_static"
+ p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_static=yes ;;
+no) enable_static=no ;;
+*)
+ enable_static=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_static=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac
+else
+ enable_static=yes
+fi
+# Check whether --enable-fast-install or --disable-fast-install was given.
+if test "${enable_fast_install+set}" = set; then
+ enableval="$enable_fast_install"
+ p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_fast_install=yes ;;
+no) enable_fast_install=no ;;
+*)
+ enable_fast_install=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_fast_install=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac
+else
+ enable_fast_install=yes
+fi
+
+echo $ECHO_N "checking build system type... $ECHO_C" 1>&6
+echo "configure:3044: checking build system type" 1>&5
+if test "x$ac_cv_build" = "x" || (test "x$build" != "xNONE" && test "x$build" != "x$ac_cv_build_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_build_alias=$build
+ case "$ac_cv_build_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ ac_cv_build_alias=$host_alias ;;
+ *) ac_cv_build_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || exit 1
+ ac_cv_build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_build" 1>&6
+
+build=$ac_cv_build
+build_alias=$ac_cv_build_alias
+build_cpu=$ac_cv_build_cpu
+build_vendor=$ac_cv_build_vendor
+build_os=$ac_cv_build_os
+
+# Check whether --with-gnu-ld or --without-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then
+ withval="$with_gnu_ld"
+ test "$withval" = no || with_gnu_ld=yes
+else
+ with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$ac_cv_prog_gcc" = yes; then
+ # Check if gcc -print-prog-name=ld gives a path.
+ echo $ECHO_N "checking for ld used by GCC... $ECHO_C" 1>&6
+echo "configure:3090: checking for ld used by GCC" 1>&5
+ case $target in
+ *-*-mingw*)
+ # gcc leaves a trailing carriage return which upsets mingw
+ ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+ *)
+ ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+ esac
+ case "$ac_prog" in
+ # Accept absolute paths.
+ [\\/]* | [A-Za-z]:[\\/]*)
+ re_direlt='/[^/][^/]*/\.\./'
+ # Canonicalize the path of ld
+ ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
+ while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+ ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"`
+ done
+ test -z "$LD" && LD="$ac_prog"
+ ;;
+ "")
+ # If it fails, then pretend we aren't using GCC.
+ ac_prog=ld
+ ;;
+ *)
+ # If it is relative, then search for the first ld in PATH.
+ with_gnu_ld=unknown
+ ;;
+ esac
+elif test "$with_gnu_ld" = yes; then
+ echo $ECHO_N "checking for GNU ld... $ECHO_C" 1>&6
+echo "configure:3120: checking for GNU ld" 1>&5
+else
+ echo $ECHO_N "checking for non-GNU ld... $ECHO_C" 1>&6
+echo "configure:3123: checking for non-GNU ld" 1>&5
+fi
+if test "${ac_cv_path_LD+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$LD"; then
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+ ac_cv_path_LD="$ac_dir/$ac_prog"
+ # Check to see if the program is GNU ld. I'd rather use --version,
+ # but apparently some GNU ld's only accept -v.
+ # Break only if it was the GNU/non-GNU ld that we prefer.
+ if "$ac_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
+ test "$with_gnu_ld" != no && break
+ else
+ test "$with_gnu_ld" != yes && break
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+else
+ ac_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$ac_cv_path_LD"
+if test -n "$LD"; then
+ echo "$ECHO_T""$LD" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+test -z "$LD" && { echo "configure: error: no acceptable ld found in \$PATH" 1>&2; exit 1; }
+echo $ECHO_N "checking if the linker ($LD) is GNU ld... $ECHO_C" 1>&6
+echo "configure:3158: checking if the linker ($LD) is GNU ld" 1>&5
+if test "${ac_cv_prog_gnu_ld+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ # I'd rather use --version here, but apparently some GNU ld's only accept -v.
+if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
+ ac_cv_prog_gnu_ld=yes
+else
+ ac_cv_prog_gnu_ld=no
+fi
+fi
+echo "$ECHO_T""$ac_cv_prog_gnu_ld" 1>&6
+with_gnu_ld=$ac_cv_prog_gnu_ld
+
+echo $ECHO_N "checking for $LD option to reload object files... $ECHO_C" 1>&6
+echo "configure:3173: checking for $LD option to reload object files" 1>&5
+if test "${lt_cv_ld_reload_flag+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ lt_cv_ld_reload_flag='-r'
+fi
+echo "$ECHO_T""$lt_cv_ld_reload_flag" 1>&6
+reload_flag=$lt_cv_ld_reload_flag
+test -n "$reload_flag" && reload_flag=" $reload_flag"
+
+echo $ECHO_N "checking how to recognise dependant libraries... $ECHO_C" 1>&6
+echo "configure:3184: checking how to recognise dependant libraries" 1>&5
+if test "${lt_cv_deplibs_check_method+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ lt_cv_file_magic_cmd='${MAGIC}'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [regex]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given egrep regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case "$host_os" in
+aix4* | beos*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+bsdi4*)
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ ;;
+
+cygwin* | mingw*)
+ lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+ lt_cv_file_magic_cmd='${OBJDUMP} -f'
+ ;;
+
+freebsd*)
+ case "$version_type" in
+ freebsd-elf*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ esac
+ ;;
+
+gnu*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+irix5* | irix6*)
+ case "$host_os" in
+ irix5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
+ ;;
+ *)
+ case "$LD" in
+ *-32|*"-32 ") libmagic=32-bit;;
+ *-n32|*"-n32 ") libmagic=N32;;
+ *-64|*"-64 ") libmagic=64-bit;;
+ *) libmagic=never-match;;
+ esac
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"
+ ;;
+ esac
+ lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+ case "$host_cpu" in
+ alpha* | i*86 | powerpc* | sparc* )
+ lt_cv_deplibs_check_method=pass_all ;;
+ *)
+ # glibc up to 2.1.1 does not perform some relocations on ARM
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;
+ esac
+ lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
+ ;;
+
+osf3* | osf4* | osf5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+sco3.2v5*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+solaris*)
+ lt_cv_deplibs_check_method=pass_all
+ lt_cv_file_magic_test_file=/lib/libc.so
+ ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ case "$host_vendor" in
+ ncr)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ motorola)
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+ lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+ ;;
+ esac
+ ;;
+esac
+
+fi
+echo "$ECHO_T""$lt_cv_deplibs_check_method" 1>&6
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+
+if test $host != $build; then
+ ac_tool_prefix=${host_alias}-
+else
+ ac_tool_prefix=
+fi
+
+# Only perform the check for file, if the check method requires it
+case "$deplibs_check_method" in
+file_magic*)
+ if test "$file_magic_cmd" = '${MAGIC}'; then
+
+echo $ECHO_N "checking for ${ac_tool_prefix}file... $ECHO_C" 1>&6
+echo "configure:3308: checking for ${ac_tool_prefix}file" 1>&5
+if test "${lt_cv_path_MAGIC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$MAGIC" in
+ /*)
+ lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path.
+ ;;
+ ?:/*)
+ ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path.
+ ;;
+ *)
+ ac_save_MAGIC="$MAGIC"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ ac_dummy="/usr/bin:$PATH"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/${ac_tool_prefix}file; then
+ lt_cv_path_MAGIC="$ac_dir/${ac_tool_prefix}file"
+ if test -n "$file_magic_test_file"; then
+ case "$deplibs_check_method" in
+ "file_magic "*)
+ file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+ MAGIC="$lt_cv_path_MAGIC"
+ if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+ egrep "$file_magic_regex" > /dev/null; then
+ :
+ else
+ cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such. This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem. Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+ fi ;;
+ esac
+ fi
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ MAGIC="$ac_save_MAGIC"
+ ;;
+esac
+fi
+
+MAGIC="$lt_cv_path_MAGIC"
+if test -n "$MAGIC"; then
+ echo "$ECHO_T""$MAGIC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$lt_cv_path_MAGIC"; then
+ if test -n "$ac_tool_prefix"; then
+ echo $ECHO_N "checking for file... $ECHO_C" 1>&6
+echo "configure:3370: checking for file" 1>&5
+if test "${lt_cv_path_MAGIC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$MAGIC" in
+ /*)
+ lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path.
+ ;;
+ ?:/*)
+ ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path.
+ ;;
+ *)
+ ac_save_MAGIC="$MAGIC"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ ac_dummy="/usr/bin:$PATH"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/file; then
+ lt_cv_path_MAGIC="$ac_dir/file"
+ if test -n "$file_magic_test_file"; then
+ case "$deplibs_check_method" in
+ "file_magic "*)
+ file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+ MAGIC="$lt_cv_path_MAGIC"
+ if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+ egrep "$file_magic_regex" > /dev/null; then
+ :
+ else
+ cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such. This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem. Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+ fi ;;
+ esac
+ fi
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ MAGIC="$ac_save_MAGIC"
+ ;;
+esac
+fi
+
+MAGIC="$lt_cv_path_MAGIC"
+if test -n "$MAGIC"; then
+ echo "$ECHO_T""$MAGIC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ MAGIC=:
+ fi
+fi
+
+ fi
+ ;;
+esac
+
+case "$target" in
+NONE) lt_target="$host" ;;
+*) lt_target="$target" ;;
+esac
+
+# Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3446: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$RANLIB"; then
+ ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+ break
+ done
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+ echo "$ECHO_T""$RANLIB" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$ac_cv_prog_RANLIB"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3480: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$RANLIB"; then
+ ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_RANLIB="ranlib"
+ break
+ done
+ test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":"
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+ echo "$ECHO_T""$RANLIB" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ RANLIB=":"
+ fi
+fi
+
+# Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3518: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_STRIP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$STRIP"; then
+ ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+ break
+ done
+fi
+fi
+STRIP="$ac_cv_prog_STRIP"
+if test -n "$STRIP"; then
+ echo "$ECHO_T""$STRIP" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$ac_cv_prog_STRIP"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3552: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_STRIP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$STRIP"; then
+ ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_STRIP="strip"
+ break
+ done
+ test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":"
+fi
+fi
+STRIP="$ac_cv_prog_STRIP"
+if test -n "$STRIP"; then
+ echo "$ECHO_T""$STRIP" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ STRIP=":"
+ fi
+fi
+
+# Check for any special flags to pass to ltconfig.
+libtool_flags="--cache-file=$cache_file"
+test "$enable_shared" = no && libtool_flags="$libtool_flags --disable-shared"
+test "$enable_static" = no && libtool_flags="$libtool_flags --disable-static"
+test "$enable_fast_install" = no && libtool_flags="$libtool_flags --disable-fast-install"
+test "$ac_cv_prog_gcc" = yes && libtool_flags="$libtool_flags --with-gcc"
+test "$ac_cv_prog_gnu_ld" = yes && libtool_flags="$libtool_flags --with-gnu-ld"
+
+# Check whether --enable-libtool-lock or --disable-libtool-lock was given.
+if test "${enable_libtool_lock+set}" = set; then
+ enableval="$enable_libtool_lock"
+
+fi
+test "x$enable_libtool_lock" = xno && libtool_flags="$libtool_flags --disable-lock"
+test x"$silent" = xyes && libtool_flags="$libtool_flags --silent"
+
+# Check whether --with-pic or --without-pic was given.
+if test "${with_pic+set}" = set; then
+ withval="$with_pic"
+ pic_mode="$withval"
+else
+ pic_mode=default
+fi
+test x"$pic_mode" = xyes && libtool_flags="$libtool_flags --prefer-pic"
+test x"$pic_mode" = xno && libtool_flags="$libtool_flags --prefer-non-pic"
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case "$lt_target" in
+*-*-irix6*)
+ # Find out which ABI we are using.
+ echo '#line 3618 "configure"' > conftest.$ac_ext
+ if { (eval echo configure:3619: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ case "`/usr/bin/file conftest.o`" in
+ *32-bit*)
+ LD="${LD-ld} -32"
+ ;;
+ *N32*)
+ LD="${LD-ld} -n32"
+ ;;
+ *64-bit*)
+ LD="${LD-ld} -64"
+ ;;
+ esac
+ fi
+ rm -rf conftest*
+ ;;
+
+*-*-sco3.2v5*)
+ # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+ SAVE_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -belf"
+ echo $ECHO_N "checking whether the C compiler needs -belf... $ECHO_C" 1>&6
+echo "configure:3640: checking whether the C compiler needs -belf" 1>&5
+if test "${lt_cv_cc_needs_belf+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ cat >conftest.$ac_ext <<EOF
+#line 3653 "configure"
+#include "confdefs.h"
+
+int
+main()
+{
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:3664: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ lt_cv_cc_needs_belf=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ lt_cv_cc_needs_belf=no
+fi
+rm -f conftest*
+
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+fi
+echo "$ECHO_T""$lt_cv_cc_needs_belf" 1>&6
+ if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+ # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+ CFLAGS="$SAVE_CFLAGS"
+ fi
+ ;;
+
+esac
+
+# Save cache, so that ltconfig can load it
+cat >confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote substitution
+ # turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ -e "s/'/'\\\\''/g" \
+ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+ ;;
+ esac >>confcache
+if cmp -s $cache_file confcache; then :; else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+# Actually configure libtool. ac_aux_dir is where install-sh is found.
+AR="$AR" CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" \
+MAGIC="$MAGIC" LD="$LD" LDFLAGS="$LDFLAGS" LIBS="$LIBS" \
+LN_S="$LN_S" NM="$NM" RANLIB="$RANLIB" STRIP="$STRIP" \
+AS="$AS" DLLTOOL="$DLLTOOL" OBJDUMP="$OBJDUMP" \
+objext="$OBJEXT" exeext="$EXEEXT" reload_flag="$reload_flag" \
+deplibs_check_method="$deplibs_check_method" file_magic_cmd="$file_magic_cmd" \
+${CONFIG_SHELL-/bin/sh} $ac_aux_dir/ltconfig --no-reexec \
+$libtool_flags --no-verify --build="$build" $ac_aux_dir/ltmain.sh $lt_target \
+|| { echo "configure: error: libtool configure failed" 1>&2; exit 1; }
+
+# Reload cache, that may have been modified by ltconfig
+if test -r "$cache_file"; then
+ echo "loading cache $cache_file"
+ test -f "$cache_file" && . $cache_file
+else
+ echo "creating cache $cache_file"
+ >$cache_file
+fi
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltconfig $ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+# Redirect the config.log output again, so that the ltconfig log is not
+# clobbered by the next message.
+exec 5>>./config.log
+
+echo $ECHO_N "checking whether optarg is declared... $ECHO_C" 1>&6
+echo "configure:3769: checking whether optarg is declared" 1>&5
+if test "${ac_cv_have_decl_optarg+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 3774 "configure"
+#include "confdefs.h"
+$ac_includes_default
+int
+main ()
+{
+#ifndef optarg
+ char *p = (char *) optarg;
+#endif
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:3788: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_have_decl_optarg=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_have_decl_optarg=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_have_decl_optarg" 1>&6
+if test $ac_cv_have_decl_optarg = yes; then
+ cat >>confdefs.h <<EOF
+#define HAVE_DECL_OPTARG 1
+EOF
+
+else
+ cat >>confdefs.h <<EOF
+#define HAVE_DECL_OPTARG 0
+EOF
+
+fi
+
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" 1>&6
+echo "configure:3813: checking for ANSI C header files" 1>&5
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 3819 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:3828: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ ac_cv_header_stdc=yes
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 3846 "configure"
+#include "confdefs.h"
+#include <string.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "memchr" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 3866 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "free" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<EOF
+#line 3888 "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+EOF
+if { (eval echo configure:3913: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+ :
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ ac_cv_header_stdc=no
+fi
+rm -fr conftest*
+
+fi
+
+fi
+fi
+echo "$ECHO_T""$ac_cv_header_stdc" 1>&6
+if test $ac_cv_header_stdc = yes; then
+ cat >>confdefs.h <<\EOF
+#define STDC_HEADERS 1
+EOF
+
+fi
+
+for ac_header in getopt.h unistd.h sys/sysctl.h sys/time.h
+do
+ac_ac_Header=`echo "ac_cv_header_$ac_header" | $ac_tr_sh`
+echo $ECHO_N "checking for $ac_header... $ECHO_C" 1>&6
+echo "configure:3940: checking for $ac_header" 1>&5
+if eval "test \"\${$ac_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 3946 "configure"
+#include "confdefs.h"
+#include <$ac_header>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:3952: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "$ac_ac_Header=yes"
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "$ac_ac_Header=no"
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""`eval echo '${'$ac_ac_Header'}'`" 1>&6
+if test `eval echo '${'$ac_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<EOF
+#define `echo "HAVE_$ac_header" | $ac_tr_cpp` 1
+EOF
+
+fi
+done
+
+echo $ECHO_N "checking for void... $ECHO_C" 1>&6
+echo "configure:3976: checking for void" 1>&5
+if test "${ac_cv_type_void+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 3981 "configure"
+#include "confdefs.h"
+$ac_includes_default
+int
+main ()
+{
+if ((void *) 0)
+ return 0;
+if (sizeof (void))
+ return 0;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:3995: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_type_void=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_type_void=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_type_void" 1>&6
+if test $ac_cv_type_void = yes; then
+ cat >>confdefs.h <<EOF
+#define HAVE_VOID 1
+EOF
+
+fi
+
+echo $ECHO_N "checking for preprocessor stringizing operator... $ECHO_C" 1>&6
+echo "configure:4015: checking for preprocessor stringizing operator" 1>&5
+if test "${ac_cv_c_stringize+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 4021 "configure"
+#include "confdefs.h"
+
+#define x(y) #y
+
+char *s = x(teststring);
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "#teststring" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_c_stringize=no
+else
+ rm -rf conftest*
+ ac_cv_c_stringize=yes
+fi
+rm -f conftest*
+
+fi
+
+if test "${ac_cv_c_stringize}" = yes; then
+ cat >>confdefs.h <<\EOF
+#define HAVE_STRINGIZE 1
+EOF
+
+fi
+echo "$ECHO_T""${ac_cv_c_stringize}" 1>&6
+
+for ac_func in getopt_long getpagesize popen processor_info strtoul sysconf sysctlbyname
+do
+ac_ac_var=`echo "ac_cv_func_$ac_func" | $ac_tr_sh`
+echo $ECHO_N "checking for $ac_func... $ECHO_C" 1>&6
+echo "configure:4053: checking for $ac_func" 1>&5
+if eval "test \"\${$ac_ac_var+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 4058 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+char $ac_func();
+char (*f)();
+
+int
+main()
+{
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+f = $ac_func;
+#endif
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:4086: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ eval "$ac_ac_var=yes"
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "$ac_ac_var=no"
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""`eval echo '${'$ac_ac_var'}'`" 1>&6
+if test `eval echo '${'$ac_ac_var'}'` = yes; then
+ cat >>confdefs.h <<EOF
+#define `echo "HAVE_$ac_func" | $ac_tr_cpp` 1
+EOF
+
+fi
+done
+
+echo $ECHO_N "checking if ansi2knr should be used... $ECHO_C" 1>&6
+echo "configure:4108: checking if ansi2knr should be used" 1>&5
+if test "${gmp_cv_c_ansi2knr+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.c <<EOF
+int main (int argc, char *argv) { return 0; }
+EOF
+if { (eval echo configure:4115: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ gmp_cv_c_ansi2knr=no
+else
+ gmp_cv_c_ansi2knr=yes
+fi
+rm -f conftest.*
+
+fi
+echo "$ECHO_T""$gmp_cv_c_ansi2knr" 1>&6
+if test $gmp_cv_c_ansi2knr = no; then
+ U= ANSI2KNR=
+else
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" 1>&6
+echo "configure:4131: checking for ANSI C header files" 1>&5
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 4137 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:4146: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ ac_cv_header_stdc=yes
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 4164 "configure"
+#include "confdefs.h"
+#include <string.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "memchr" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 4184 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "free" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<EOF
+#line 4206 "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+EOF
+if { (eval echo configure:4231: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+ :
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ ac_cv_header_stdc=no
+fi
+rm -fr conftest*
+
+fi
+
+fi
+fi
+echo "$ECHO_T""$ac_cv_header_stdc" 1>&6
+if test $ac_cv_header_stdc = yes; then
+ cat >>confdefs.h <<\EOF
+#define STDC_HEADERS 1
+EOF
+
+fi
+
+ for ac_header in string.h
+do
+ac_ac_Header=`echo "ac_cv_header_$ac_header" | $ac_tr_sh`
+echo $ECHO_N "checking for $ac_header... $ECHO_C" 1>&6
+echo "configure:4258: checking for $ac_header" 1>&5
+if eval "test \"\${$ac_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 4264 "configure"
+#include "confdefs.h"
+#include <$ac_header>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:4270: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "$ac_ac_Header=yes"
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "$ac_ac_Header=no"
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""`eval echo '${'$ac_ac_Header'}'`" 1>&6
+if test `eval echo '${'$ac_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<EOF
+#define `echo "HAVE_$ac_header" | $ac_tr_cpp` 1
+EOF
+
+fi
+done
+
+fi
+
+syntax=
+# For now, we use the old switch for setting syntax.
+# FIXME: Remove when conversion to .asm is completed.
+case "${target}" in
+ m680[234]0*-*-linuxaout* | m68k*-*-linuxaout* | \
+ m68k-next-nextstep* | \
+ m68000*-*-*)
+ syntax=mit
+ ;;
+ m680[234]0*-*-linux* | m68k*-*-linux*)
+ syntax=elf
+ ;;
+ m680[234]0*-*-* | m68k*-*-*)
+ syntax=mit
+ ;;
+esac
+
+# Now build an asm-syntax.h file for targets that include that from the
+# assembly files.
+# FIXME: Remove when conversion to .asm is completed.
+case "${family}-${underscore}-${asm_align}-${syntax}" in
+ m68k-yes-log-mit)
+ echo '#define MIT_SYNTAX' >asm-syntax.h
+ cat $srcdir/mpn/underscore.h >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+ m68k-no-nolog-elf)
+ echo '#define ELF_SYNTAX' >asm-syntax.h
+ echo '#define C_SYMBOL_NAME(name) name' >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+esac
+
+# The pattern here tests for an absolute path the same way as
+# _AC_OUTPUT_FILES in autoconf acgeneral.m4.
+
+echo "dnl CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir" >> $gmp_tmpconfigm4
+
+case "$srcdir" in
+[\\/]* | ?:[\\/]* )
+
+echo "define(<CONFIG_TOP_SRCDIR>,<\`$srcdir'>)" >> $gmp_tmpconfigm4
+ ;;
+*)
+
+echo "define(<CONFIG_TOP_SRCDIR>,<\`../$srcdir'>)" >> $gmp_tmpconfigm4
+ ;;
+esac
+
+echo "include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')" >> $gmp_tmpconfigm4p
+
+# Must be after asm-defs.m4
+
+echo "define_not_for_expansion(\`HAVE_TARGET_CPU_$target_cpu')" >> $gmp_tmpconfigm4p
+
+case "$target" in
+ alpha*-cray-unicos*)
+ gmp_m4postinc="alpha/unicos.m4"
+ ;;
+ alpha*-*-*)
+ gmp_m4postinc="alpha/default.m4"
+ ;;
+ power*-*-*)
+ case "$target" in
+ *-*-mach* | *-*-rhapsody* | *-*-nextstep* | *-*-darwin* | *-*-macosx*)
+ ;; # these use non-conventional assembly syntax.
+ powerpc64-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc64/aix.m4"
+ ;;
+ *-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc32/aix.m4"
+ ;;
+ *)
+ gmp_m4postinc="powerpc32/regmap.m4"
+ ;;
+ esac
+ ;;
+esac
+
+for tmp_f in $gmp_m4postinc; do
+
+echo "include_mpn(\`$tmp_f')" >> $gmp_tmpconfigm4p
+
+done
+
+# Set up `gmp_links'. It's a list of link:file pairs that configure will
+# process to create link -> file.
+gmp_links=
+
+# If the user specified `MPN_PATH', use that instead of the path we've
+# come up with.
+if test -z "$MPN_PATH"; then
+ path="$path generic"
+else
+ path="$MPN_PATH"
+fi
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension. Certain multi-function files with special names
+# can provide some functions too. (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# FIXME: udiv and umul aren't in $gmp_mpn_functions_optional yet since
+# there's some versions of those files which should be checked for bit
+# rot first. Put them in $extra_functions for each target for now,
+# change to standard optionals when all are ready.
+
+# Note: The following lines defining $gmp_mpn_functions_optional
+# and $gmp_mpn_functions are parsed by the "macos/configure"
+# Perl script. So if you change the lines in a major way
+# make sure to run and examine the output from
+#
+# % (cd macos; perl configure)
+
+gmp_mpn_functions_optional="copyi copyd com_n \
+ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n"
+
+gmp_mpn_functions="${extra_functions} inlines add_n sub_n mul_1 addmul_1 \
+ submul_1 lshift rshift diveby3 divrem divrem_1 divrem_2 \
+ mod_1 mod_1_rs pre_mod_1 dump \
+ mul mul_fft mul_n mul_basecase sqr_basecase random \
+ random2 sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \
+ bdivmod gcd_1 gcd gcdext tdiv_qr bz_divrem_n sb_divrem_mn jacbase \
+ $gmp_mpn_functions_optional"
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp="mpn/mp_bases.lo"
+
+# SLPJ trace
+echo "Peering at file structure (takes a while)..." 1>&6
+
+for tmp_fn in ${gmp_mpn_functions} ; do
+# SLPJ trace
+ echo "...$tmp_fn..." 1>&6
+
+# This line was
+# rm -f mpn/${tmp_fn}.[Ssc] mpn/${tmp_fn}.asm
+# but I found that on my NT workstation the command
+# would unpredictably hang. rm wasn't an active process,
+# but absolutlely nothing was happening.
+# I *think* that expanding the [Ssc] cures the problem
+# SLPJ May 01
+ rm -f mpn/${tmp_fn}.S mpn/${tmp_fn}.s mpn/${tmp_fn}.c mpn/${tmp_fn}.asm
+
+ echo "...$tmp_fn (done rm)..." 1>&6
+
+ # functions that can be provided by multi-function files
+ tmp_mulfunc=
+ case $tmp_fn in
+ add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+ popcount|hamdist) tmp_mulfunc="popham" ;;
+ and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+ tmp_mulfunc="logops_n" ;;
+ esac
+
+ found=no
+ for tmp_dir in $path; do
+
+# SLPJ trace
+# We get stuck sometimes
+ echo " ...dir $tmp_dir..." 1>&6
+ for tmp_base in $tmp_fn $tmp_mulfunc; do
+
+# SLPJ trace
+# We get stuck sometimes
+ echo " ...base $tmp_base..." 1>&6
+ for tmp_ext in asm S s c; do
+ tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+
+# SLPJ trace
+# We get stuck sometimes
+ echo " ...$tmp_file..." 1>&6
+
+ if test -f $tmp_file; then
+ found=yes
+
+ mpn_objects="$mpn_objects ${tmp_fn}.lo"
+ mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_fn}.lo"
+ gmp_links="$gmp_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext"
+
+ # duplicate AC_DEFINEs are harmless, so it doesn't matter
+ # that multi-function files get grepped here repeatedly
+ gmp_ep="`
+ sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+ sed -n 's/^[ ]*PROLOGUE.*(\(.*\))/\1/p' $tmp_file
+ `"
+ for gmp_tmp in $gmp_ep; do
+ cat >>confdefs.h <<EOF
+#define HAVE_NATIVE_${gmp_tmp} 1
+EOF
+
+ done
+
+ break
+ fi
+ done
+ if test $found = yes; then break ; fi
+ done
+ if test $found = yes; then break ; fi
+ done
+
+ if test $found = no; then
+ for tmp_optional in $gmp_mpn_functions_optional; do
+ if test $tmp_optional = $tmp_fn; then
+ found=yes
+ fi
+ done
+ if test $found = no; then
+ { echo "configure: error: no version of $tmp_fn found in path: $path" 1>&2; exit 1; }
+ fi
+ fi
+done
+
+
+# Create link for gmp-mparam.h.
+
+# SLPJ trace
+echo "Creating link for gmp-mparam.h..." 1>&6
+
+for tmp_dir in $path ; do
+ rm -f gmp-mparam.h
+ if test -f $srcdir/mpn/${tmp_dir}/gmp-mparam.h ; then
+ gmp_links="$gmp_links gmp-mparam.h:mpn/${tmp_dir}/gmp-mparam.h"
+
+ # Copy any KARATSUBA_SQR_THRESHOLD in gmp-mparam.h to config.m4.
+ # Some versions of sqr_basecase.asm use this.
+ tmp_gmp_karatsuba_sqr_threshold="`sed -n 's/^#define KARATSUBA_SQR_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $srcdir/mpn/${tmp_dir}/gmp-mparam.h`"
+ if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+
+echo "define(<KARATSUBA_SQR_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)" >> $gmp_tmpconfigm4
+
+ fi
+
+ break
+ fi
+done
+
+# SLPJ trace
+echo "Digging out links to include in DISTCLEANFILES..." 1>&6
+
+# Dig out the links from `gmp_links' for inclusion in DISTCLEANFILES.
+gmp_srclinks=
+for f in $gmp_links; do
+ gmp_srclinks="$gmp_srclinks `echo $f | sed 's/\(.*\):.*/\1/'`"
+done
+
+echo "creating $gmp_configm4"
+echo "dnl $gmp_configm4. Generated automatically by configure." > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+ echo "changequote(<,>)dnl" >> $gmp_configm4
+ echo "ifdef(<__CONFIG_M4_INCLUDED__>,,<" >> $gmp_configm4
+ cat $gmp_tmpconfigm4 >> $gmp_configm4
+ echo ">)" >> $gmp_configm4
+ echo "changequote(\`,')dnl" >> $gmp_configm4
+ rm $gmp_tmpconfigm4
+fi
+echo "ifdef(\`__CONFIG_M4_INCLUDED__',,\`" >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+ cat $gmp_tmpconfigm4i >> $gmp_configm4
+ rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+ cat $gmp_tmpconfigm4p >> $gmp_configm4
+ rm $gmp_tmpconfigm4p
+fi
+echo "')" >> $gmp_configm4
+echo "define(\`__CONFIG_M4_INCLUDED__')" >> $gmp_configm4
+
+trap '' 1 2 15
+cat >confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote substitution
+ # turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ -e "s/'/'\\\\''/g" \
+ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+ ;;
+ esac >>confcache
+if cmp -s $cache_file confcache; then :; else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Any assignment to VPATH causes Sun make to only execute
+# the first set of double-colon rules, so remove it if not needed.
+# If there is a colon in the path, we need to keep it.
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d'
+fi
+
+DEFS=-DHAVE_CONFIG_H
+
+: ${CONFIG_STATUS=./config.status}
+trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15
+echo creating $CONFIG_STATUS
+cat >$CONFIG_STATUS <<EOF
+#! /bin/sh
+# Generated automatically by configure.
+# Run this file to recreate the current configuration.
+# This directory was configured as follows,
+# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+#
+# $0 $ac_configure_args
+#
+# Compiler output produced by configure, useful for debugging
+# configure, is in ./config.log if it exists.
+
+# Files that config.status was made for.
+config_files="\\
+ Makefile mpn/Makefile mpz/Makefile"
+config_headers="\\
+ config.h:config.in"
+config_links="\\
+ $gmp_links"
+config_commands="\\
+ default-1"
+
+ac_cs_usage="\\
+\\\`$CONFIG_STATUS' instantiates files from templates according to the
+current configuration.
+
+Usage: $CONFIG_STATUS [OPTIONS] FILE...
+
+ --recheck Update $CONFIG_STATUS by reconfiguring in the same conditions
+ --version Print the version of Autoconf and exit
+ --help Display this help and exit
+ --file=FILE[:TEMPLATE]
+ Instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ Instantiate the configuration header FILE
+
+Configuration files:
+\$config_files
+
+Configuration headers:
+\$config_headers
+
+Configuration links:
+\$config_links
+
+Configuration commands:
+\$config_commands
+
+Report bugs to <bug-autoconf@gnu.org>."
+
+ac_cs_version="\\
+$CONFIG_STATUS generated by autoconf version 2.14a.
+Configured on host `(hostname || uname -n) 2>/dev/null | sed 1q` by
+ `echo "$0 $ac_configure_args" | sed 's/[\\"\`\$]/\\\\&/g'`"
+
+# Root of the tmp file names. Use pid to allow concurrent executions.
+ac_cs_root=cs\$\$
+ac_given_srcdir=$srcdir
+ac_given_INSTALL="$INSTALL"
+
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test \$# != 0
+do
+ case "\$1" in
+ --*=*)
+ ac_option=\`echo "\$1" | sed -e 's/=.*//'\`
+ ac_optarg=\`echo "\$1" | sed -e 's/[^=]*=//'\`
+ shift
+ set dummy "\$ac_option" "\$ac_optarg" \${1+"\$@"}
+ shift
+ ;;
+ -*);;
+ *) # This is not an option, so the user has probably given explicit
+ # arguments.
+ ac_need_defaults=false;;
+ esac
+
+ case "\$1" in
+
+ # Handling of the options.
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ echo "running \${CONFIG_SHELL-/bin/sh} $0 `echo "$ac_configure_args" | sed 's/[\\"\`\$]/\\\\&/g'` --no-create --no-recursion"
+ exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;;
+ -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
+ echo "\$ac_cs_version"; exit 0 ;;
+ --he | --h)
+ # Conflict between --help and --header
+ echo "$CONFIG_STATUS: ambiguous option: \$ac_option
+Try \\\`$CONFIG_STATUS --help' for more information."; exit 1 ;;
+ -help | --help | --hel )
+ echo "\$ac_cs_usage"; exit 0 ;;
+ --file | --fil | --fi | --f )
+ shift
+ CONFIG_FILES="\$CONFIG_FILES \$1"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ shift
+ CONFIG_HEADERS="\$CONFIG_FILES \$1"
+ ac_need_defaults=false;;
+
+ # Handling of arguments.
+ 'Makefile' ) CONFIG_FILES="\$CONFIG_FILES Makefile" ;;
+ 'mpz/Makefile' ) CONFIG_FILES="\$CONFIG_FILES mpz/Makefile" ;;
+ 'mpn/Makefile' ) CONFIG_FILES="\$CONFIG_FILES mpn/Makefile" ;;
+ '$gmp_links' ) CONFIG_LINKS="\$CONFIG_LINKS $gmp_links" ;;
+ 'default-1' ) CONFIG_COMMANDS="\$CONFIG_COMMANDS default-1" ;;
+ 'config.h' ) CONFIG_HEADERS="\$CONFIG_HEADERS config.h:config.in" ;;
+
+ # This is an error.
+ -*) echo "$CONFIG_STATUS: unrecognized option: \$1
+Try \\\`$CONFIG_STATUS --help' for more information."; exit 1 ;;
+ *) echo "$CONFIG_STATUS: invalid argument: \$1"; exit 1 ;;
+ esac
+ shift
+done
+
+EOF
+
+cat >>$CONFIG_STATUS <<\EOF
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+if $ac_need_defaults; then
+ : ${CONFIG_FILES=$config_files}
+ : ${CONFIG_HEADERS=$config_headers}
+ : ${CONFIG_LINKS=$config_links}
+ : ${CONFIG_COMMANDS=$config_commands}
+fi
+
+# Trap to remove the temp files.
+trap 'rm -fr $ac_cs_root*; exit 1' 1 2 15
+
+EOF
+
+cat >>$CONFIG_STATUS <<EOF
+#
+# INIT-COMMANDS section.
+#
+
+EOF
+
+cat >>$CONFIG_STATUS <<EOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+ # Protect against being on the right side of a sed subst in config.status.
+ sed 's/%@/@@/; s/@%/@@/; s/%;t t\$/@;t t/; /@;t t\$/s/[\\\\&%]/\\\\&/g;
+ s/@@/%@/; s/@@/@%/; s/@;t t\$/%;t t/' >\$ac_cs_root.subs <<\\CEOF
+s%@exec_prefix@%$exec_prefix%;t t
+s%@prefix@%$prefix%;t t
+s%@program_transform_name@%$program_transform_name%;t t
+s%@bindir@%$bindir%;t t
+s%@sbindir@%$sbindir%;t t
+s%@libexecdir@%$libexecdir%;t t
+s%@datadir@%$datadir%;t t
+s%@sysconfdir@%$sysconfdir%;t t
+s%@sharedstatedir@%$sharedstatedir%;t t
+s%@localstatedir@%$localstatedir%;t t
+s%@libdir@%$libdir%;t t
+s%@includedir@%$includedir%;t t
+s%@oldincludedir@%$oldincludedir%;t t
+s%@infodir@%$infodir%;t t
+s%@mandir@%$mandir%;t t
+s%@SHELL@%$SHELL%;t t
+s%@ECHO_C@%$ECHO_C%;t t
+s%@ECHO_N@%$ECHO_N%;t t
+s%@ECHO_T@%$ECHO_T%;t t
+s%@CFLAGS@%$CFLAGS%;t t
+s%@CPPFLAGS@%$CPPFLAGS%;t t
+s%@CXXFLAGS@%$CXXFLAGS%;t t
+s%@FFLAGS@%$FFLAGS%;t t
+s%@DEFS@%$DEFS%;t t
+s%@LDFLAGS@%$LDFLAGS%;t t
+s%@LIBS@%$LIBS%;t t
+s%@host@%$host%;t t
+s%@host_alias@%$host_alias%;t t
+s%@host_cpu@%$host_cpu%;t t
+s%@host_vendor@%$host_vendor%;t t
+s%@host_os@%$host_os%;t t
+s%@target@%$target%;t t
+s%@target_alias@%$target_alias%;t t
+s%@target_cpu@%$target_cpu%;t t
+s%@target_vendor@%$target_vendor%;t t
+s%@target_os@%$target_os%;t t
+s%@build@%$build%;t t
+s%@build_alias@%$build_alias%;t t
+s%@build_cpu@%$build_cpu%;t t
+s%@build_vendor@%$build_vendor%;t t
+s%@build_os@%$build_os%;t t
+s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%;t t
+s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%;t t
+s%@INSTALL_DATA@%$INSTALL_DATA%;t t
+s%@PACKAGE@%$PACKAGE%;t t
+s%@VERSION@%$VERSION%;t t
+s%@ACLOCAL@%$ACLOCAL%;t t
+s%@AUTOCONF@%$AUTOCONF%;t t
+s%@AUTOMAKE@%$AUTOMAKE%;t t
+s%@AUTOHEADER@%$AUTOHEADER%;t t
+s%@MAKEINFO@%$MAKEINFO%;t t
+s%@AMTAR@%$AMTAR%;t t
+s%@install_sh@%$install_sh%;t t
+s%@AWK@%$AWK%;t t
+s%@SET_MAKE@%$SET_MAKE%;t t
+s%@AMDEP@%$AMDEP%;t t
+s%@AMDEPBACKSLASH@%$AMDEPBACKSLASH%;t t
+s%@DEPDIR@%$DEPDIR%;t t
+s%@MAINTAINER_MODE_TRUE@%$MAINTAINER_MODE_TRUE%;t t
+s%@MAINTAINER_MODE_FALSE@%$MAINTAINER_MODE_FALSE%;t t
+s%@MAINT@%$MAINT%;t t
+s%@WANT_MPBSD_TRUE@%$WANT_MPBSD_TRUE%;t t
+s%@WANT_MPBSD_FALSE@%$WANT_MPBSD_FALSE%;t t
+s%@WANT_MPFR_TRUE@%$WANT_MPFR_TRUE%;t t
+s%@WANT_MPFR_FALSE@%$WANT_MPFR_FALSE%;t t
+s%@CC@%$CC%;t t
+s%@CCAS@%$CCAS%;t t
+s%@CPP@%$CPP%;t t
+s%@LN_S@%$LN_S%;t t
+s%@M4@%$M4%;t t
+s%@AR@%$AR%;t t
+s%@CALLING_CONVENTIONS_OBJS@%$CALLING_CONVENTIONS_OBJS%;t t
+s%@SPEED_CYCLECOUNTER_OBJS@%$SPEED_CYCLECOUNTER_OBJS%;t t
+s%@EXEEXT@%$EXEEXT%;t t
+s%@OBJEXT@%$OBJEXT%;t t
+s%@RANLIB@%$RANLIB%;t t
+s%@STRIP@%$STRIP%;t t
+s%@LIBTOOL@%$LIBTOOL%;t t
+s%@U@%$U%;t t
+s%@ANSI2KNR@%$ANSI2KNR%;t t
+s%@mpn_objects@%$mpn_objects%;t t
+s%@mpn_objs_in_libgmp@%$mpn_objs_in_libgmp%;t t
+s%@gmp_srclinks@%$gmp_srclinks%;t t
+CEOF
+
+EOF
+
+ cat >>$CONFIG_STATUS <<\EOF
+ # Split the substitutions into bite-sized pieces for seds with
+ # small command number limits, like on Digital OSF/1 and HP-UX.
+ ac_max_sed_lines=48
+ ac_sed_frag=1 # Number of current file.
+ ac_beg=1 # First line for current file.
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
+ ac_more_lines=:
+ ac_sed_cmds=""
+ while $ac_more_lines; do
+ if test $ac_beg -gt 1; then
+ sed "1,${ac_beg}d; ${ac_end}q" $ac_cs_root.subs >$ac_cs_root.sfrag
+ else
+ sed "${ac_end}q" $ac_cs_root.subs >$ac_cs_root.sfrag
+ fi
+ if test ! -s $ac_cs_root.sfrag; then
+ ac_more_lines=false
+ rm -f $ac_cs_root.sfrag
+ else
+ # The purpose of the label and of the branching condition is to
+ # speed up the sed processing (if there are no `@' at all, there
+ # is no need to browse any of the substitutions).
+ # These are the two extra sed commands mentioned above.
+ (echo ':t
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $ac_cs_root.sfrag) >$ac_cs_root.s$ac_sed_frag
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds="sed -f $ac_cs_root.s$ac_sed_frag"
+ else
+ ac_sed_cmds="$ac_sed_cmds | sed -f $ac_cs_root.s$ac_sed_frag"
+ fi
+ ac_sed_frag=`expr $ac_sed_frag + 1`
+ ac_beg=$ac_end
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
+ fi
+ done
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds=cat
+ fi
+fi # test -n "$CONFIG_FILES"
+
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case "$ac_file" in
+ *:*) ac_file_in=`echo "$ac_file" | sed 's%[^:]*:%%'`
+ ac_file=`echo "$ac_file" | sed 's%:.*%%'` ;;
+ *) ac_file_in="${ac_file}.in" ;;
+ esac
+
+ # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories.
+
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dir=`echo "$ac_file" | sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+ # The file is in a subdirectory.
+ test ! -d "$ac_dir" && mkdir "$ac_dir"
+ ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`"
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_dots=`echo "$ac_dir_suffix" | sed 's%/[^/]*%../%g'`
+ else
+ ac_dir_suffix= ac_dots=
+ fi
+
+ case "$ac_given_srcdir" in
+ .) srcdir=.
+ if test -z "$ac_dots"; then top_srcdir=.
+ else top_srcdir=`echo $ac_dots | sed 's%/$%%'`; fi ;;
+ [\\/]* | ?:[\\/]* )
+ srcdir="$ac_given_srcdir$ac_dir_suffix";
+ top_srcdir=$ac_given_srcdir ;;
+ *) # Relative path.
+ srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
+ top_srcdir="$ac_dots$ac_given_srcdir" ;;
+ esac
+
+ case "$ac_given_INSTALL" in
+ [\\/$]* | ?:[\\/]* ) INSTALL="$ac_given_INSTALL" ;;
+ *) INSTALL="$ac_dots$ac_given_INSTALL" ;;
+ esac
+
+ echo creating "$ac_file"
+ rm -f "$ac_file"
+ configure_input="Generated automatically from `echo $ac_file_in |
+ sed 's%.*/%%'` by configure."
+ case "$ac_file" in
+ *[Mm]akefile*) ac_comsub="1i\\
+# $configure_input" ;;
+ *) ac_comsub= ;;
+ esac
+
+ # Don't redirect the output to AC_FILE directly: use `mv' so that updating
+ # is atomic, and doesn't need trapping.
+ ac_file_inputs=`echo "$ac_file_in" |
+ sed -e "s%:% $ac_given_srcdir/%g;s%^%$ac_given_srcdir/%"`
+ for ac_file_input in $ac_file_inputs;
+ do
+ test -f "$ac_file_input" ||
+ { echo "configure: error: cannot find input file \`$ac_file_input'" 1>&2; exit 1; }
+ done
+EOF
+cat >>$CONFIG_STATUS <<EOF
+ sed -e "$ac_comsub
+$ac_vpsub
+$extrasub
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s%@configure_input@%$configure_input%;t t
+s%@srcdir@%$srcdir%;t t
+s%@top_srcdir@%$top_srcdir%;t t
+s%@INSTALL@%$INSTALL%;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$ac_cs_root.out
+ mv $ac_cs_root.out $ac_file
+
+fi; done
+rm -f $ac_cs_root.s*
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+
+#
+# CONFIG_HEADER section.
+#
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='[ ].*$%\1#\2'
+ac_dC=' '
+ac_dD='%;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='$%\1#\2define\3'
+ac_uC=' '
+ac_uD='%;t'
+
+for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case "$ac_file" in
+ *:*) ac_file_in=`echo "$ac_file" | sed 's%[^:]*:%%'`
+ ac_file=`echo "$ac_file" | sed 's%:.*%%'` ;;
+ *) ac_file_in="${ac_file}.in" ;;
+ esac
+
+ echo creating $ac_file
+
+ rm -f $ac_cs_root.frag $ac_cs_root.in $ac_cs_root.out
+ ac_file_inputs=`echo "$ac_file_in" |
+ sed -e "s%:% $ac_given_srcdir/%g;s%^%$ac_given_srcdir/%"`
+ for ac_file_input in $ac_file_inputs;
+ do
+ test -f "$ac_file_input" ||
+ { echo "configure: error: cannot find input file \`$ac_file_input'" 1>&2; exit 1; }
+ done
+ # Remove the trailing spaces.
+ sed -e 's/[ ]*$//' $ac_file_inputs >$ac_cs_root.in
+
+EOF
+
+# Transform confdefs.h into two sed scripts, `conftest.defines' and
+# `conftest.undefs', that substitutes the proper values into
+# config.h.in to produce config.h. The first handles `#define'
+# templates, and the second `#undef' templates.
+# And first: Protect against being on the right side of a sed subst in
+# config.status. Protect against being in an unquoted here document
+# in config.status.
+rm -f conftest.defines conftest.undefs
+ac_cs_root=conftest
+cat >$ac_cs_root.hdr <<\EOF
+s/[\\&%]/\\&/g
+s%[\\$`]%\\&%g
+t clear
+: clear
+s%^[ ]*#[ ]*define[ ][ ]*\(\([^ (][^ (]*\)([^)]*)\)[ ]*\(.*\)$%${ac_dA}\2${ac_dB}\1${ac_dC}\3${ac_dD}%gp
+t cleanup
+s%^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp
+: cleanup
+EOF
+# If some macros were called several times there might be several times
+# the same #defines, which is useless. Nevertheless, we may not want to
+# sort them, since we want the *last* AC_DEFINE to be honored.
+uniq confdefs.h | sed -n -f $ac_cs_root.hdr >conftest.defines
+sed -e 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
+rm -f $ac_cs_root.hdr
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >>conftest.undefs <<\EOF
+s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */%
+EOF
+
+# Break up conftest.defines because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
+echo ' if egrep "^[ ]*#[ ]*define" $ac_cs_root.in >/dev/null; then' >>$CONFIG_STATUS
+echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
+echo ' :' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.defines >/dev/null
+do
+ # Write a limited-size here document to $ac_cs_root.frag.
+ echo ' cat >$ac_cs_root.frag <<CEOF' >>$CONFIG_STATUS
+ echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $ac_cs_root.frag $ac_cs_root.in >$ac_cs_root.out
+ rm -f $ac_cs_root.in
+ mv $ac_cs_root.out $ac_cs_root.in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines
+echo ' fi # egrep' >>$CONFIG_STATUS
+echo >>$CONFIG_STATUS
+
+# Break up conftest.undefs because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.undefs >/dev/null
+do
+ # Write a limited-size here document to $ac_cs_root.frag.
+ echo ' cat >$ac_cs_root.frag <<CEOF' >>$CONFIG_STATUS
+ echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $ac_cs_root.frag $ac_cs_root.in >$ac_cs_root.out
+ rm -f $ac_cs_root.in
+ mv $ac_cs_root.out $ac_cs_root.in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
+ rm -f conftest.undefs
+ mv conftest.tail conftest.undefs
+done
+rm -f conftest.undefs
+
+cat >>$CONFIG_STATUS <<\EOF
+ rm -f $ac_cs_root.frag $ac_cs_root.h
+ echo "/* $ac_file. Generated automatically by configure. */" >$ac_cs_root.h
+ cat $ac_cs_root.in >>$ac_cs_root.h
+ rm -f $ac_cs_root.in
+ if cmp -s $ac_file $ac_cs_root.h 2>/dev/null; then
+ echo "$ac_file is unchanged"
+ rm -f $ac_cs_root.h
+ else
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dir=`echo "$ac_file" | sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+ # The file is in a subdirectory.
+ test ! -d "$ac_dir" && mkdir "$ac_dir"
+ fi
+ rm -f $ac_file
+ mv $ac_cs_root.h $ac_file
+ fi
+fi; done
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+
+#
+# CONFIG_LINKS section.
+#
+srcdir=$ac_given_srcdir
+
+for ac_file in : $CONFIG_LINKS; do if test "x$ac_file" != x:; then
+ ac_dest=`echo "$ac_file" | sed 's%:.*%%'`
+ ac_source=`echo "$ac_file" | sed 's%[^:]*:%%'`
+
+ echo "copying $srcdir/$ac_source to $ac_dest"
+
+ if test ! -r $srcdir/$ac_source; then
+ { echo "configure: error: $srcdir/$ac_source: File not found" 1>&2; exit 1; }
+ fi
+ rm -f $ac_dest
+
+ # Make relative symlinks.
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dest_dir=`echo $ac_dest | sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dest_dir" != "$ac_dest" && test "$ac_dest_dir" != .; then
+ # The dest file is in a subdirectory.
+ test ! -d "$ac_dest_dir" && mkdir "$ac_dest_dir"
+ ac_dest_dir_suffix="/`echo $ac_dest_dir|sed 's%^\./%%'`"
+ # A "../" for each directory in $ac_dest_dir_suffix.
+ ac_dots=`echo $ac_dest_dir_suffix|sed 's%/[^/]*%../%g'`
+ else
+ ac_dest_dir_suffix= ac_dots=
+ fi
+
+ case "$srcdir" in
+ [\\/$]* | ?:[\\/]* ) ac_rel_source="$srcdir/$ac_source" ;;
+ *) ac_rel_source="$ac_dots$srcdir/$ac_source" ;;
+ esac
+
+ # Note: Dodgy local mods to 'make things work' in an environment (cygwin)
+ # that supports symlinks (through silly hack) using tools that don't
+ # understand them (mingw). The end sometimes justifies the means, son.
+ #
+ # Make a symlink if possible; otherwise try a hard link.
+ #if ln -s $ac_rel_source $ac_dest 2>/dev/null ||
+ # ln $srcdir/$ac_source $ac_dest; then :
+ #
+ # Note: If the -p offends your 'cp', just drop it; no harm done, you'll just
+ # get more recompilations.
+ #
+ if cp -p $srcdir/$ac_source $ac_dest; then :
+ else
+ { echo "configure: error: cannot copy $ac_dest to $srcdir/$ac_source" 1>&2; exit 1; }
+ fi
+fi; done
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+
+#
+# CONFIG_COMMANDS section.
+#
+for ac_file in .. $CONFIG_COMMANDS; do if test "x$ac_file" != x..; then
+ ac_dest=`echo "$ac_file" | sed 's%:.*%%'`
+ ac_source=`echo "$ac_file" | sed 's%[^:]*:%%'`
+
+ case "$ac_dest" in
+ default-1 ) test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h ;;
+ esac
+fi;done
+EOF
+
+cat >>$CONFIG_STATUS <<\EOF
+
+exit 0
+EOF
+chmod +x $CONFIG_STATUS
+rm -fr confdefs* $ac_clean_files
+trap 'exit 1' 1 2 15
+
+test "$no_create" = yes || $SHELL $CONFIG_STATUS || exit 1
diff --git a/rts/gmp/configure.in b/rts/gmp/configure.in
new file mode 100644
index 0000000000..18f610fe29
--- /dev/null
+++ b/rts/gmp/configure.in
@@ -0,0 +1,950 @@
+dnl Process this file with autoconf to produce a configure script.
+
+
+dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+AC_REVISION($Revision: 1.8 $)dnl
+AC_PREREQ(2.14)dnl
+AC_INIT(gmp-impl.h)
+
+dnl Check system.
+AC_CANONICAL_SYSTEM
+
+dnl Automake
+AM_INIT_AUTOMAKE(gmp, GMP_VERSION)
+AM_CONFIG_HEADER(config.h:config.in)
+AM_MAINTAINER_MODE
+
+dnl GMP specific
+GMP_INIT(config.m4)
+
+
+AC_ARG_ENABLE(assert,
+AC_HELP_STRING([--enable-assert],[enable ASSERT checking [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-assert, need yes or no]) ;;
+esac],
+[enable_assert=no])
+
+if test "$enable_assert" = "yes"; then
+ AC_DEFINE(WANT_ASSERT,1,
+ [./configure --enable-assert option, to enable some ASSERT()s])
+fi
+
+
+AC_ARG_ENABLE(alloca,
+AC_HELP_STRING([--enable-alloca],[use alloca for temp space [default=yes]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-alloca, need yes or no]) ;;
+esac],
+[enable_alloca=yes])
+
+if test "$enable_alloca" = "no"; then
+ AC_DEFINE(USE_STACK_ALLOC,1,
+ [./configure --disable-alloca option, to use stack-alloc.c, not alloca])
+fi
+
+
+AC_ARG_ENABLE(fft,
+AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-fft, need yes or no]) ;;
+esac],
+[enable_fft=no])
+
+if test "$enable_fft" = "yes"; then
+ AC_DEFINE(WANT_FFT,1,
+ [./configure --enable-fft option, to enable FFTs for multiplication])
+fi
+
+
+AC_ARG_ENABLE(mpbsd,
+AC_HELP_STRING([--enable-mpbsd],[build Berkley MP compatibility library [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-mpbsd, need yes or no]) ;;
+esac],
+[enable_mpbsd=no])
+AM_CONDITIONAL(WANT_MPBSD, test "$enable_mpbsd" = "yes")
+
+
+AC_ARG_ENABLE(mpfr,
+AC_HELP_STRING([--enable-mpfr],[build MPFR [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-mpfr, need yes or no]) ;;
+esac],
+[enable_mpfr=no])
+AM_CONDITIONAL(WANT_MPFR, test "$enable_mpfr" = "yes")
+
+
+dnl Switch on OS and determine what compiler to use.
+dnl
+dnl os_64bit Set to "yes" if OS is 64-bit capable.
+dnl FIXME: Rename to `check_64bit_compiler'!
+dnl cclist List of compilers, best first.
+dnl gmp_cflags_{cc} Flags for compiler named {cc}.
+dnl gmp_cflags64_{cc} Flags for compiler named {cc} for 64-bit code.
+dnl gmp_optcflags_{cc} Optional compiler flags.
+dnl gmp_xoptcflags_{cc} Exclusive optional compiler flags.
+dnl
+os_64bit="no"
+cclist="gcc cc" # FIXME: Prefer c89 to cc.
+gmp_cflags_gcc="-g -O2"
+gmp_cflags64_gcc="-g -O2"
+gmp_cflags_cc="-g"
+gmp_cflags64_cc="-g"
+
+case "$target" in
+ # Alpha
+ alpha*-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ gmp_cflags_cc="$gmp_cflags_cc -O"
+ ;;
+ alpha*-*-osf*)
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in # compilers don't seem to understand `ev67' and such.
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ # FIXME: We shouldn't fail fatally if none of these work, but that's
+ # how xoptcflags work and we don't have any other mechanism right now.
+ # Why do we need this here and not for alpha*-*-* below?
+ gmp_xoptcflags_gcc="-Wa,-arch,${flavour} -Wa,-m${flavour}"
+ gmp_optcflags_cc="-arch $flavour -tune $flavour"
+ fi
+ ;;
+ alpha*-*-*)
+ cclist="gcc"
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ fi
+ ;;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ # Don't inherit default gmp_cflags_cc value; it comes with -g which
+ # disables all optimization on Cray vector systems
+ gmp_cflags_cc="-O"
+ ;;
+
+ # AMD and Intel x86 configurations
+ [i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*])
+ # Rumour has it -O2 used to give worse register allocation than just -O.
+ gmp_cflags_gcc="-g -O -fomit-frame-pointer"
+
+ case "${target}" in
+ i386*-*-*) gmp_optcflags_gcc="-mcpu=i386 -march=i386";;
+ i486*-*-*) gmp_optcflags_gcc="-mcpu=i486 -march=i486";;
+ i586*-*-* | pentium-*-* | pentiummmx-*-*)
+ gmp_optcflags_gcc="-mcpu=pentium -march=pentium";;
+
+ # -march=pentiumpro not used because mpz/powm.c (swox cvs rev 1.4)
+ # tickles a bug in gcc 2.95.2 (believed fixed in 2.96).
+ [i686*-*-* | pentiumpro-*-* | pentium[23]-*-*])
+ gmp_optcflags_gcc="-mcpu=pentiumpro";;
+
+ k6*-*-*) gmp_optcflags_gcc="-mcpu=k6 -march=k6";;
+
+ # Athlon instruction costs are close to p6: 3 cycle load latency, 4-6
+ # cycle mul, 40 cycle div, pairable adc, ...
+ # FIXME: Change this when gcc gets something specific for Athlon.
+ # -march=pentiumpro not used, per i686 above.
+ athlon-*-*) gmp_optcflags_gcc="-mcpu=pentiumpro";;
+ esac
+ ;;
+
+ # Sparc
+ [ultrasparc*-*-solaris2.[7-9] | sparcv9-*-solaris2.[7-9]])
+ os_64bit=yes
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ gmp_cflags64_cc="-xtarget=native -xarch=v9 -xO4"
+ ;;
+ sparc64-*-linux*)
+ # Need to think more about the options passed here. This isn't good for
+ # some sparc64 linux distros, since we end up not optimizing when all the
+ # options below fail.
+ os_64bit=yes
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_gcc="$gmp_cflags_gcc -m32"
+ gmp_xoptflags_gcc="-mcpu=ultrasparc -mvis"
+ ;;
+ ultrasparc*-*-* | sparcv9-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-solaris2.* | microsparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-* | microsparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92"
+ gmp_cflags_cc="-O2" # FIXME: Flag for v8?
+ ;;
+ supersparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4 -DSUPERSPARC"
+ ;;
+ supersparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92 -DSUPERSPARC"
+ gmp_cflags_cc="-O2 -DSUPERSPARC" # FIXME: Flag for v8?
+ ;;
+ *sparc*-*-*)
+ cclist="gcc acc cc"
+ gmp_cflags_acc="-g -O2"
+ gmp_cflags_cc="-g -O2"
+ ;;
+
+ # POWER/PowerPC
+ powerpc64-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -maix64 -mpowerpc64"
+ gmp_cflags_xlc="-g -O2 -q64 -qtune=pwr3"
+ ;;
+ powerpc*-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=ppc -O2"
+ ;;
+ power-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpower"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=pwr -O2"
+ ;;
+ powerpc64*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc64"
+ AC_DEFINE(_LONG_LONG_LIMB) dnl FIXME: Remove.
+ ;;
+ powerpc-apple-darwin* | powerpc-apple-macosx*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc -traditional-cpp"
+ ;;
+ powerpc*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ ;;
+
+ # MIPS
+ mips-sgi-irix6.*)
+ os_64bit=yes
+ gmp_cflags64_gcc="-g -O2 -mabi=n32"
+ gmp_cflags64_cc="$gmp_cflags64_cc -O2 -n32"
+ ;;
+
+ # Motorola 68k family
+ m88110*-*-*)
+ gmp_cflags_gcc="-g -O -m88110" dnl FIXME: Use `-O2'?
+ ;;
+ m68*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # HP
+ hppa1.0*-*-*)
+ cclist="gcc c89 cc"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+ hppa2.0w*-*-*)
+ cclist="c89 cc"
+ gmp_cflags_c89="+DD64 +O3"
+ gmp_cflags_cc="+DD64 +O3"
+ ;;
+ hppa2.0*-*-*)
+ os_64bit=yes
+ cclist="gcc c89 cc"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -mWHAT -D_LONG_LONG_LIMB"
+ # +O2 to cc triggers bug in mpz/powm.c (1.4)
+ gmp_cflags64_c89="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags64_cc="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+
+ # VAX
+ vax*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # Fujitsu
+ [f30[01]-fujitsu-sysv*])
+ cclist="gcc vcc"
+ gmp_cflags_vcc="-g" # FIXME: flags for vcc?
+ ;;
+esac
+
+case "${target}" in
+ *-*-mingw32) gmp_cflags_gcc="$gmp_cflags_gcc -mno-cygwin";;
+esac
+
+dnl Check for programs needed by macros for finding compiler.
+dnl More programs are checked for below, when a compiler is found.
+AC_PROG_NM dnl Macro from Libtool.
+# nm on 64-bit AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ NM="$NM -X 64"
+ ;;
+esac
+
+# Save CFLAGS given on command line.
+gmp_user_CFLAGS="$CFLAGS"
+
+if test -z "$CC"; then
+ # Find compiler.
+ GMP_PROG_CC_FIND($cclist, $os_64bit)
+
+ # If 64-bit OS and we have a 64-bit compiler, use it.
+ if test -n "$os_64bit" && test -n "$CC64"; then
+ CC=$CC64
+ CFLAGS=$CFLAGS64
+ else
+ eval CFLAGS=\$gmp_cflags_$CC
+ fi
+
+ # Try compiler flags that may work with only some compiler versions.
+ # gmp_optcflags: All or nothing.
+ eval optcflags=\$gmp_optcflags_$CC
+ if test -n "$optcflags"; then
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $optcflags"
+ AC_MSG_CHECKING([whether $CC accepts $optcflags])
+ AC_LANG_C
+ AC_TRY_COMPILER([int main(){return(0);}], optok, cross)
+ if test "$optok" = "yes"; then
+ AC_MSG_RESULT([yes])
+ else
+ AC_MSG_RESULT([no])
+ CFLAGS="$CFLAGS_save"
+ fi
+ fi
+ # gmp_xoptcflags: First is best, one has to work.
+ eval xoptcflags=\$gmp_xoptcflags_$CC
+ if test -n "$xoptcflags"; then
+ gmp_found="no"
+ for xopt in $xoptcflags; do
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $xopt"
+ AC_MSG_CHECKING([whether $CC accepts $xopt])
+ AC_LANG_C
+ AC_TRY_COMPILER([int main(){return(0);}], optok, cross)
+ if test "$optok" = "yes"; then
+ AC_MSG_RESULT([yes])
+ gmp_found="yes"
+ break
+ else
+ AC_MSG_RESULT([no])
+ CFLAGS="$CFLAGS_save"
+ fi
+ done
+ if test "$gmp_found" = "no"; then
+ echo ["$0: fatal: need a compiler that understands one of $xoptcflags"]
+ exit 1
+ fi
+ fi
+fi
+
+# Restore CFLAGS given on command line.
+# FIXME: We've run through quite some unnecessary code looking for a
+# nice compiler and working flags for it, just to spoil that with user
+# supplied flags.
+test -n "$gmp_user_CFLAGS" && CFLAGS="$gmp_user_CFLAGS"
+
+# Select chosen compiler.
+GMP_PROG_CC_SELECT
+
+# How to assemble.
+CCAS="$CC -c"
+AC_SUBST(CCAS)
+
+dnl Checks for programs.
+dnl --------------------
+AC_PROG_CPP
+AC_PROG_INSTALL
+AC_PROG_LN_S
+GMP_PROG_M4
+AC_CHECK_PROG(AR, ar, ar)
+# ar on AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ AR="$AR -X 64"
+ ;;
+esac
+dnl FIXME: Find good ld? /usr/ucb/ld on Solaris won't work.
+
+dnl Checks for assembly syntax.
+if test "$gmp_no_asm_syntax_testing" != "yes"; then
+ GMP_CHECK_ASM_TEXT
+ GMP_CHECK_ASM_DATA
+ GMP_CHECK_ASM_GLOBL
+ GMP_CHECK_ASM_LABEL_SUFFIX
+ GMP_CHECK_ASM_TYPE
+ GMP_CHECK_ASM_SIZE
+ GMP_CHECK_ASM_LSYM_PREFIX
+ GMP_CHECK_ASM_W32
+ GMP_CHECK_ASM_UNDERSCORE(underscore=yes, underscore=no)
+ GMP_CHECK_ASM_ALIGN_LOG(asm_align=log, asm_align=nolog)
+fi
+
+dnl FIXME: Check for FPU and set `floating_point' appropriately.
+
+dnl ========================================
+dnl Configuring mpn.
+dnl ----------------------------------------
+dnl Set the following target specific variables:
+dnl path where to search for source files
+dnl family processor family (Needed for building
+dnl asm-syntax.h for now. FIXME: Remove.)
+dnl extra_functions extra functions
+
+family=generic
+
+case ${target} in
+ arm*-*-*)
+ path="arm"
+ ;;
+ [sparcv9*-*-solaris2.[789]* | sparc64*-*-solaris2.[789]* | ultrasparc*-*-solaris2.[789]*])
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparc64-*-linux*)
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparcv8*-*-* | microsparc*-*-*)
+ path="sparc32/v8 sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-*)
+ path="sparc32/v9 sparc32/v8 sparc32"
+ extra_functions="udiv_fp"
+ ;;
+ supersparc*-*-*)
+ path="sparc32/v8/supersparc sparc32/v8 sparc32"
+ extra_functions="udiv"
+ ;;
+ sparc*-*-*) path="sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ hppa7000*-*-*)
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa1.0*-*-*)
+ path="hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa2.0w-*-*)
+ path="pa64w"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ ;;
+ hppa2.0*-*-*)
+ if test -n "$CC64"; then
+ path="pa64"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ # We need to use the system compiler, or actually the system assembler,
+ # since GAS has not been ported to understand the 2.0 instructions.
+ CCAS="$CC64 -c"
+ else
+ # FIXME: path should be "hppa/hppa2_0 hppa/hppa1_1 hppa"
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ fi
+ ;;
+ hppa*-*-*) #assume pa7100
+ path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd";;
+ [f30[01]-fujitsu-sysv*])
+ path=fujitsu;;
+ alphaev6*-*-*) path="alpha/ev6 alpha"; extra_functions="invert_limb cntlz";;
+ alphaev5*-*-*) path="alpha/ev5 alpha"; extra_functions="invert_limb cntlz";;
+ alpha*-*-*) path="alpha"; extra_functions="invert_limb cntlz";;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ path="cray"
+ extra_functions="mulww";;
+ am29000*-*-*) path="a29k";;
+ a29k*-*-*) path="a29k";;
+
+ # AMD and Intel x86 configurations
+
+ [i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*])
+ gmp_m4postinc="x86/x86-defs.m4"
+ extra_functions="udiv umul"
+ CALLING_CONVENTIONS_OBJS="x86call.o x86check.o"
+
+ GMP_CHECK_ASM_SHLDL_CL(
+ [GMP_DEFINE(WANT_SHLDL_CL,1)],
+ [GMP_DEFINE(WANT_SHLDL_CL,0)])
+ GMP_CHECK_ASM_ALIGN_FILL_0x90
+
+ # the CPUs below wanting to know about mmx
+ case ${target} in
+ [pentiummmx-*-* | pentium[23]-*-* | k6*-*-* | athlon-*-*])
+ GMP_CHECK_ASM_MMX(tmp_mmx=yes, tmp_mmx=no)
+ ;;
+ esac
+
+ # default for anything not otherwise mentioned
+ path="x86"
+
+ case ${target} in
+ [i[34]86*-*-*])
+ path="x86"
+ ;;
+ k5*-*-*)
+ # don't know what best suits k5
+ path="x86"
+ ;;
+ i586*-*-* | pentium-*-*)
+ path="x86/pentium x86"
+ ;;
+ pentiummmx-*-*)
+ path="x86/pentium x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/pentium/mmx $path"
+ fi
+ ;;
+ i686*-*-* | pentiumpro-*-*)
+ path="x86/p6 x86"
+ ;;
+ pentium2-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ pentium3-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/p3mmx x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ [k6[23]*-*-*])
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/k62mmx x86/k6/mmx $path"
+ fi
+ ;;
+ k6*-*-*)
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/mmx $path"
+ fi
+ ;;
+ athlon-*-*)
+ path="x86/k7 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k7/mmx $path"
+ fi
+ ;;
+ esac
+ ;;
+
+
+ i960*-*-*) path="i960";;
+
+ ia64*-*-*) path="ia64";;
+
+# Motorola 68k configurations. Let m68k mean 68020-68040.
+ [m680[234]0*-*-* | m68k*-*-* | \
+ m68*-next-nextstep*]) # Nexts are at least '020
+ path="m68k/mc68020 m68k"
+ family=m68k
+ ;;
+ m68000*-*-*)
+ path="m68k"
+ family=m68k
+ ;;
+
+ m88k*-*-* | m88k*-*-*) path="m88k";;
+ m88110*-*-*) path="m88k/mc88110 m88k";;
+ ns32k*-*-*) path="ns32k";;
+
+ pyramid-*-*) path="pyr";;
+
+ ppc601-*-*) path="power powerpc32";;
+ powerpc64*-*-*) path="powerpc64";;
+ powerpc*-*-*) path="powerpc32";;
+ rs6000-*-* | power-*-* | power2-*-*)
+ path="power"
+ extra_functions="udiv_w_sdiv"
+ ;;
+
+ sh-*-*) path="sh";;
+ sh2-*-*) path="sh/sh2 sh";;
+
+ [mips[34]*-*-*]) path="mips3";;
+ mips*-*-irix6*) path="mips3";;
+ mips*-*-*) path="mips2";;
+
+ vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv";;
+
+ z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv";;
+ z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv";;
+
+ clipper*-*-*) path="clipper";;
+esac
+
+AC_SUBST(CALLING_CONVENTIONS_OBJS)
+if test -n "$CALLING_CONVENTIONS_OBJS"; then
+ AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
+ [Define if mpn/tests has calling conventions checking for the CPU])
+fi
+
+
+case ${target} in
+ [i[5-8]86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*])
+ # rdtsc is in pentium and up, not in i386 and i486
+ SPEED_CYCLECOUNTER_OBJS=pentium.lo
+ ;;
+ alpha*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=alpha.lo
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-* | sparc64*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=sparcv9.lo
+ ;;
+ hppa2*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa2.lo
+ ;;
+ hppa*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa.lo
+ ;;
+esac
+
+AC_SUBST(SPEED_CYCLECOUNTER_OBJS)
+
+if test -n "$SPEED_CYCLECOUNTER_OBJS"
+then
+ AC_DEFINE(HAVE_SPEED_CYCLECOUNTER, 1,
+ [Define if a speed_cyclecounter exists (for the tune programs)])
+fi
+
+
+dnl Extensions for executable and object files.
+dnl -------------------------------------------
+AC_EXEEXT
+AC_OBJEXT
+
+dnl Use Libtool.
+dnl ------------
+dnl FIXME: Shared libs seem to fail on aix4.3.
+dnl FIXME: Should invoke [AC_DISABLE_SHARED], but m4 recurses to death.
+case "$target" in
+ [*-*-aix4.[3-9]*]) enable_shared=no ;;
+esac
+AC_PROG_LIBTOOL
+
+dnl Checks for libraries.
+dnl ---------------------
+AC_CHECK_DECLS((optarg))
+
+dnl Checks for header files.
+dnl ------------------------
+AC_HEADER_STDC
+AC_CHECK_HEADERS(getopt.h unistd.h sys/sysctl.h sys/time.h)
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+dnl --------------------------------------------------------------
+AC_CHECK_TYPES((void))
+AC_C_STRINGIZE
+
+dnl Checks for library functions.
+dnl -----------------------------
+dnl Most of these are only for the benefit of supplementary programs. The
+dnl library itself doesn't use anything weird.
+dnl AC_FUNC_MEMCMP
+dnl AC_TYPE_SIGNAL
+dnl AC_CHECK_FUNCS(strtol)
+AC_CHECK_FUNCS(getopt_long getpagesize popen processor_info strtoul sysconf sysctlbyname)
+
+dnl Trick automake into thinking we've run AM_C_PROTOTYPES which it wants
+dnl for ansi2knr, and instead use our own test. (It's only a warning
+dnl automake prints, but it's good to suppress it.)
+ifelse(0,1,[
+AM_C_PROTOTYPES
+])
+GMP_C_ANSI2KNR
+
+
+dnl Set `syntax' to one of <blank>, "mit", "elf", "aix", "macho".
+syntax=
+# For now, we use the old switch for setting syntax.
+# FIXME: Remove when conversion to .asm is completed.
+changequote(,)dnl
+case "${target}" in
+ m680[234]0*-*-linuxaout* | m68k*-*-linuxaout* | \
+ m68k-next-nextstep* | \
+ m68000*-*-*)
+ syntax=mit
+ ;;
+ m680[234]0*-*-linux* | m68k*-*-linux*)
+ syntax=elf
+ ;;
+ m680[234]0*-*-* | m68k*-*-*)
+ syntax=mit
+ ;;
+esac
+changequote([,])dnl
+
+dnl ----------------------------------------
+# Now build an asm-syntax.h file for targets that include that from the
+# assembly files.
+# FIXME: Remove when conversion to .asm is completed.
+case "${family}-${underscore}-${asm_align}-${syntax}" in
+ m68k-yes-log-mit)
+ echo '#define MIT_SYNTAX' >asm-syntax.h
+ cat $srcdir/mpn/underscore.h >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+ m68k-no-nolog-elf)
+ echo '#define ELF_SYNTAX' >asm-syntax.h
+ echo '#define C_SYMBOL_NAME(name) name' >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+esac
+
+
+# The pattern here tests for an absolute path the same way as
+# _AC_OUTPUT_FILES in autoconf acgeneral.m4.
+GMP_DEFINE_RAW(["dnl CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir"])
+case "$srcdir" in
+[[\\/]]* | ?:[[\\/]]* )
+ GMP_DEFINE_RAW(["define(<CONFIG_TOP_SRCDIR>,<\`$srcdir'>)"]) ;;
+*) GMP_DEFINE_RAW(["define(<CONFIG_TOP_SRCDIR>,<\`../$srcdir'>)"]) ;;
+esac
+
+GMP_DEFINE_RAW(["include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')"], POST)
+
+# Must be after asm-defs.m4
+GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_TARGET_CPU_$target_cpu')", POST)
+
+
+dnl config.m4 post-includes
+dnl -----------------------
+dnl (Note x86 post include set with $path above.)
+changequote(,)dnl
+case "$target" in
+ alpha*-cray-unicos*)
+ gmp_m4postinc="alpha/unicos.m4"
+ ;;
+ alpha*-*-*)
+ gmp_m4postinc="alpha/default.m4"
+ ;;
+ power*-*-*)
+ case "$target" in
+ *-*-mach* | *-*-rhapsody* | *-*-nextstep* | *-*-darwin* | *-*-macosx*)
+ ;; # these use non-conventional assembly syntax.
+ powerpc64-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc64/aix.m4"
+ ;;
+ *-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc32/aix.m4"
+ ;;
+ *)
+ gmp_m4postinc="powerpc32/regmap.m4"
+ ;;
+ esac
+ ;;
+esac
+changequote([, ])dnl
+
+for tmp_f in $gmp_m4postinc; do
+ GMP_DEFINE_RAW(["include_mpn(\`$tmp_f')"], POST)
+done
+
+
+# Set up `gmp_links'. It's a list of link:file pairs that configure will
+# process to create link -> file.
+gmp_links=
+
+# If the user specified `MPN_PATH', use that instead of the path we've
+# come up with.
+if test -z "$MPN_PATH"; then
+ path="$path generic"
+else
+ path="$MPN_PATH"
+fi
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension. Certain multi-function files with special names
+# can provide some functions too. (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# FIXME: udiv and umul aren't in $gmp_mpn_functions_optional yet since
+# there's some versions of those files which should be checked for bit
+# rot first. Put them in $extra_functions for each target for now,
+# change to standard optionals when all are ready.
+
+# Note: The following lines defining $gmp_mpn_functions_optional
+# and $gmp_mpn_functions are parsed by the "macos/configure"
+# Perl script. So if you change the lines in a major way
+# make sure to run and examine the output from
+#
+# % (cd macos; perl configure)
+
+gmp_mpn_functions_optional="copyi copyd com_n \
+ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n"
+
+gmp_mpn_functions="${extra_functions} inlines add_n sub_n mul_1 addmul_1 \
+ submul_1 lshift rshift diveby3 divrem divrem_1 divrem_2 \
+ mod_1 mod_1_rs pre_mod_1 dump \
+ mul mul_fft mul_n mul_basecase sqr_basecase random \
+ random2 sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \
+ bdivmod gcd_1 gcd gcdext tdiv_qr bz_divrem_n sb_divrem_mn jacbase \
+ $gmp_mpn_functions_optional"
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp="mpn/mp_bases.lo"
+
+for tmp_fn in ${gmp_mpn_functions} ; do
+ [rm -f mpn/${tmp_fn}.[Ssc] mpn/${tmp_fn}.asm]
+
+ # functions that can be provided by multi-function files
+ tmp_mulfunc=
+ case $tmp_fn in
+ add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+ popcount|hamdist) tmp_mulfunc="popham" ;;
+ and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+ tmp_mulfunc="logops_n" ;;
+ esac
+
+ found=no
+ for tmp_dir in $path; do
+ for tmp_base in $tmp_fn $tmp_mulfunc; do
+ for tmp_ext in asm S s c; do
+ tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+ if test -f $tmp_file; then
+ found=yes
+
+ mpn_objects="$mpn_objects ${tmp_fn}.lo"
+ mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_fn}.lo"
+ gmp_links="$gmp_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext"
+
+ # duplicate AC_DEFINEs are harmless, so it doesn't matter
+ # that multi-function files get grepped here repeatedly
+ gmp_ep=["`
+ sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+ sed -n 's/^[ ]*PROLOGUE.*(\(.*\))/\1/p' $tmp_file
+ `"]
+ for gmp_tmp in $gmp_ep; do
+ AC_DEFINE_UNQUOTED(HAVE_NATIVE_${gmp_tmp})
+ done
+
+ break
+ fi
+ done
+ if test $found = yes; then break ; fi
+ done
+ if test $found = yes; then break ; fi
+ done
+
+ if test $found = no; then
+ for tmp_optional in $gmp_mpn_functions_optional; do
+ if test $tmp_optional = $tmp_fn; then
+ found=yes
+ fi
+ done
+ if test $found = no; then
+ AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
+ fi
+ fi
+done
+
+# Create link for gmp-mparam.h.
+for tmp_dir in $path ; do
+ rm -f gmp-mparam.h
+ if test -f $srcdir/mpn/${tmp_dir}/gmp-mparam.h ; then
+ gmp_links="$gmp_links gmp-mparam.h:mpn/${tmp_dir}/gmp-mparam.h"
+
+ # Copy any KARATSUBA_SQR_THRESHOLD in gmp-mparam.h to config.m4.
+ # Some versions of sqr_basecase.asm use this.
+ tmp_gmp_karatsuba_sqr_threshold="`sed -n 's/^#define KARATSUBA_SQR_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $srcdir/mpn/${tmp_dir}/gmp-mparam.h`"
+ if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+ GMP_DEFINE_RAW(["define(<KARATSUBA_SQR_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
+ fi
+
+ break
+ fi
+done
+
+# Dig out the links from `gmp_links' for inclusion in DISTCLEANFILES.
+gmp_srclinks=
+for f in $gmp_links; do
+ gmp_srclinks="$gmp_srclinks `echo $f | sed 's/\(.*\):.*/\1/'`"
+done
+
+AC_SUBST(mpn_objects)
+AC_SUBST(mpn_objs_in_libgmp)
+AC_SUBST(gmp_srclinks)
+
+dnl ----------------------------------------
+dnl Make links.
+AC_CONFIG_LINKS($gmp_links)
+
+dnl Create config.m4.
+GMP_FINISH
+
+dnl Create Makefiles
+dnl FIXME: Upcoming version of autoconf/automake may not like broken lines.
+AC_OUTPUT(Makefile mpz/Makefile mpn/Makefile)
diff --git a/rts/gmp/depcomp b/rts/gmp/depcomp
new file mode 100644
index 0000000000..7906096738
--- /dev/null
+++ b/rts/gmp/depcomp
@@ -0,0 +1,269 @@
+#! /bin/sh
+
+# depcomp - compile a program generating dependencies as side-effects
+# Copyright (C) 1999 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+ echo "depcomp: Variables source, object and depmode must be set" 1>&2
+ exit 1
+fi
+# `libtool' can also be set to `yes' or `no'.
+
+depfile=${depfile-`echo "$object" | sed 's,\([^/]*\)$,.deps/\1,;s/\.\([^.]*\)$/.P\1/'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Some modes work just like other modes, but use different flags. We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write. Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+ # HP compiler uses -M and no extra arg.
+ gccflag=-M
+ depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+ # This is just like dashmstdout with a different argument.
+ dashmflag=-xM
+ depmode=dashmstdout
+fi
+
+case "$depmode" in
+gcc)
+## There are various ways to get dependency output from gcc. Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+## up in a subdir. Having to rename by hand is ugly.
+## (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+## -MM, not -M (despite what the docs say).
+## - Using -M directly means running the compiler twice (even worse
+## than renaming).
+ if test -z "$gccflag"; then
+ gccflag=-MD,
+ fi
+ if "$@" -Wp,"$gccflag$tmpdepfile"; then :
+ else
+ stat=$?
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ sed 's/^[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the `deleted header file' problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header). We avoid this by adding
+## dummy dependencies for each header file. Too bad gcc doesn't do
+## this for us directly.
+ tr ' ' '
+' < "$tmpdepfile" |
+## Some versions of gcc put a space before the `:'. On the theory
+## that the space means something, we add a space to the output as
+## well.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+hp)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+dashmd)
+ # The Java front end to gcc doesn't run cpp, so we can't use the -Wp
+ # trick. Instead we must use -M and then rename the resulting .d
+ # file. This is also the case for older versions of gcc, which
+ # don't implement -Wp.
+ if "$@" -MD; then :
+ else
+ stat=$?
+ rm -f FIXME
+ exit $stat
+ fi
+ FIXME: rewrite the file
+ ;;
+
+sgi)
+ if test "$libtool" = yes; then
+ "$@" "-Wc,-MDupdate,$tmpdepfile"
+ else
+ "$@" -MDupdate "$tmpdepfile"
+ fi
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ stat=$?
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ sed 's/^[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+ tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+#nosideeffect)
+ # This comment above is used by automake to tell side-effect
+ # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the proprocessed file to stdout, regardless of -o,
+ # because we must use -o when running libtool.
+ test -z "$dashmflag" && dashmflag=-M
+ ( IFS=" "
+ case " $* " in
+ *" --mode=compile "*) # this is libtool, let us make it quiet
+ for arg
+ do # cycle over the arguments
+ case "$arg" in
+ "--mode=compile")
+ # insert --quiet before "--mode=compile"
+ set fnord "$@" --quiet
+ shift # fnord
+ ;;
+ esac
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # "$arg"
+ done
+ ;;
+ esac
+ "$@" $dashmflag | sed 's:^[^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
+ ) &
+ proc=$!
+ "$@"
+ stat=$?
+ wait "$proc"
+ if test "$stat" != 0; then exit $stat; fi
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+dashXmstdout)
+ # This case only exists to satisfy depend.m4. It is never actually
+ # run, as this mode is specially recognized in the preamble.
+ exit 1
+ ;;
+
+makedepend)
+ # X makedepend
+ (
+ shift
+ cleared=no
+ for arg in "$@"; do
+ case $cleared in no)
+ set ""; shift
+ cleared=yes
+ esac
+ case "$arg" in
+ -D*|-I*)
+ set fnord "$@" "$arg"; shift;;
+ -*)
+ ;;
+ *)
+ set fnord "$@" "$arg"; shift;;
+ esac
+ done
+ obj_suffix="`echo $object | sed 's/^.*\././'`"
+ touch "$tmpdepfile"
+ ${MAKEDEPEND-makedepend} 2>/dev/null -o"$obj_suffix" -f"$tmpdepfile" "$@"
+ ) &
+ proc=$!
+ "$@"
+ stat=$?
+ wait "$proc"
+ if test "$stat" != 0; then exit $stat; fi
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ tail +3 "$tmpdepfile" | tr ' ' '
+' | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile" "$tmpdepfile".bak
+ ;;
+
+cpp)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the proprocessed file to stdout, regardless of -o,
+ # because we must use -o when running libtool.
+ ( IFS=" "
+ case " $* " in
+ *" --mode=compile "*)
+ for arg
+ do # cycle over the arguments
+ case "$arg" in
+ "--mode=compile")
+ # insert --quiet before "--mode=compile"
+ set fnord "$@" --quiet
+ shift # fnord
+ ;;
+ esac
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # "$arg"
+ done
+ ;;
+ esac
+ "$@" -E |
+ sed -n '/^# [0-9][0-9]* "\([^"]*\)"/ s::'"$object"'\: \1:p' > "$tmpdepfile"
+ ) &
+ proc=$!
+ "$@"
+ stat=$?
+ wait "$proc"
+ if test "$stat" != 0; then exit $stat; fi
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ sed < "$tmpdepfile" -e 's/^[^:]*: //' -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+none)
+ exec "$@"
+ ;;
+
+*)
+ echo "Unknown depmode $depmode" 1>&2
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/rts/gmp/errno.c b/rts/gmp/errno.c
new file mode 100644
index 0000000000..7dd223c19c
--- /dev/null
+++ b/rts/gmp/errno.c
@@ -0,0 +1,26 @@
+/* gmp_errno -- The largest and most complex file in GMP.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int gmp_errno = 0;
diff --git a/rts/gmp/extract-dbl.c b/rts/gmp/extract-dbl.c
new file mode 100644
index 0000000000..2d70d9a3b2
--- /dev/null
+++ b/rts/gmp/extract-dbl.c
@@ -0,0 +1,187 @@
+/* __gmp_extract_double -- convert from double to array of mp_limb_t.
+
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef XDEBUG
+#undef _GMP_IEEE_FLOATS
+#endif
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+/* Extract a non-negative double in d. */
+
+int
+#if __STDC__
+__gmp_extract_double (mp_ptr rp, double d)
+#else
+__gmp_extract_double (rp, d)
+ mp_ptr rp;
+ double d;
+#endif
+{
+ long exp;
+ unsigned sc;
+ mp_limb_t manh, manl;
+
+ /* BUGS
+
+ 1. Should handle Inf and NaN in IEEE specific code.
+ 2. Handle Inf and NaN also in default code, to avoid hangs.
+ 3. Generalize to handle all BITS_PER_MP_LIMB >= 32.
+ 4. This lits is incomplete and misspelled.
+ */
+
+ if (d == 0.0)
+ {
+ rp[0] = 0;
+ rp[1] = 0;
+#if BITS_PER_MP_LIMB == 32
+ rp[2] = 0;
+#endif
+ return 0;
+ }
+
+#if _GMP_IEEE_FLOATS
+ {
+#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8
+ /* Work around alpha-specific bug in GCC 2.8.x. */
+ volatile
+#endif
+ union ieee_double_extract x;
+ x.d = d;
+ exp = x.s.exp;
+#if BITS_PER_MP_LIMB == 64
+ manl = (((mp_limb_t) 1 << 63)
+ | ((mp_limb_t) x.s.manh << 43) | ((mp_limb_t) x.s.manl << 11));
+ if (exp == 0)
+ {
+ /* Denormalized number. Don't try to be clever about this,
+ since it is not an important case to make fast. */
+ exp = 1;
+ do
+ {
+ manl = manl << 1;
+ exp--;
+ }
+ while ((mp_limb_signed_t) manl >= 0);
+ }
+#else
+ manh = ((mp_limb_t) 1 << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
+ manl = x.s.manl << 11;
+ if (exp == 0)
+ {
+ /* Denormalized number. Don't try to be clever about this,
+ since it is not an important case to make fast. */
+ exp = 1;
+ do
+ {
+ manh = (manh << 1) | (manl >> 31);
+ manl = manl << 1;
+ exp--;
+ }
+ while ((mp_limb_signed_t) manh >= 0);
+ }
+#endif
+ exp -= 1022; /* Remove IEEE bias. */
+ }
+#else
+ {
+ /* Unknown (or known to be non-IEEE) double format. */
+ exp = 0;
+ if (d >= 1.0)
+ {
+ if (d * 0.5 == d)
+ abort ();
+
+ while (d >= 32768.0)
+ {
+ d *= (1.0 / 65536.0);
+ exp += 16;
+ }
+ while (d >= 1.0)
+ {
+ d *= 0.5;
+ exp += 1;
+ }
+ }
+ else if (d < 0.5)
+ {
+ while (d < (1.0 / 65536.0))
+ {
+ d *= 65536.0;
+ exp -= 16;
+ }
+ while (d < 0.5)
+ {
+ d *= 2.0;
+ exp -= 1;
+ }
+ }
+
+ d *= MP_BASE_AS_DOUBLE;
+#if BITS_PER_MP_LIMB == 64
+ manl = d;
+#else
+ manh = d;
+ manl = (d - manh) * MP_BASE_AS_DOUBLE;
+#endif
+ }
+#endif
+
+ sc = (unsigned) exp % BITS_PER_MP_LIMB;
+
+ /* We add something here to get rounding right. */
+ exp = (exp + 2048) / BITS_PER_MP_LIMB - 2048 / BITS_PER_MP_LIMB + 1;
+
+#if BITS_PER_MP_LIMB == 64
+ if (sc != 0)
+ {
+ rp[1] = manl >> (BITS_PER_MP_LIMB - sc);
+ rp[0] = manl << sc;
+ }
+ else
+ {
+ rp[1] = manl;
+ rp[0] = 0;
+ exp--;
+ }
+#else
+ if (sc != 0)
+ {
+ rp[2] = manh >> (BITS_PER_MP_LIMB - sc);
+ rp[1] = (manl >> (BITS_PER_MP_LIMB - sc)) | (manh << sc);
+ rp[0] = manl << sc;
+ }
+ else
+ {
+ rp[2] = manh;
+ rp[1] = manl;
+ rp[0] = 0;
+ exp--;
+ }
+#endif
+
+ return exp;
+}
diff --git a/rts/gmp/gmp-impl.h b/rts/gmp/gmp-impl.h
new file mode 100644
index 0000000000..3c7ac26e7d
--- /dev/null
+++ b/rts/gmp/gmp-impl.h
@@ -0,0 +1,1072 @@
+/* Include file for internal GNU MP types and definitions.
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+ BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "config.h"
+#include "gmp-mparam.h"
+/* #include "longlong.h" */
+
+/* When using gcc, make sure to use its builtin alloca. */
+#if ! defined (alloca) && defined (__GNUC__)
+#define alloca __builtin_alloca
+#define HAVE_ALLOCA 1
+#endif
+
+/* When using cc, do whatever necessary to allow use of alloca. For many
+ machines, this means including alloca.h. IBM's compilers need a #pragma
+ in "each module that needs to use alloca". */
+#if ! defined (alloca)
+/* We need lots of variants for MIPS, to cover all versions and perversions
+ of OSes for MIPS. */
+#if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
+ || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
+ || defined (__alpha) || defined (__sparc) || defined (sparc) \
+ || defined (__ksr__)
+#include <alloca.h>
+#define HAVE_ALLOCA
+#endif
+#if defined (_IBMR2)
+#pragma alloca
+#define HAVE_ALLOCA
+#endif
+#if defined (__DECC)
+#define alloca(x) __ALLOCA(x)
+#define HAVE_ALLOCA
+#endif
+#endif
+
+#if defined (alloca)
+# ifndef HAVE_ALLOCA
+#define HAVE_ALLOCA
+# endif
+#endif
+
+#if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
+#include "stack-alloc.h"
+#else
+#define TMP_DECL(m)
+#define TMP_ALLOC(x) alloca(x)
+#define TMP_MARK(m)
+#define TMP_FREE(m)
+#endif
+
+/* Allocating various types. */
+#define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
+#define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
+#define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
+
+
+#if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
+ __DECC understands __inline */
+#define inline /* Empty */
+#endif
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+#define numberof(x) (sizeof (x) / sizeof ((x)[0]))
+
+/* Field access macros. */
+#define SIZ(x) ((x)->_mp_size)
+#define ABSIZ(x) ABS (SIZ (x))
+#define PTR(x) ((x)->_mp_d)
+#define LIMBS(x) ((x)->_mp_d)
+#define EXP(x) ((x)->_mp_exp)
+#define PREC(x) ((x)->_mp_prec)
+#define ALLOC(x) ((x)->_mp_alloc)
+
+/* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
+ rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
+ about arithmetic overflow. */
+#define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
+#define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
+#define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
+#define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
+#define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
+
+#define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
+#define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
+
+#define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
+
+#ifndef ULONG_MAX
+#define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
+#endif
+#define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
+#define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
+#ifndef LONG_MAX
+#define LONG_MAX SIGNED_TYPE_MAX (long)
+#endif
+
+#ifndef USHORT_MAX
+#define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
+#endif
+#define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
+#define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
+#ifndef SHORT_MAX
+#define SHORT_MAX SIGNED_TYPE_MAX (short)
+#endif
+
+
+/* Swap macros. */
+
+#define MP_LIMB_T_SWAP(x, y) \
+ do { \
+ mp_limb_t __mp_limb_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_limb_t_swap__tmp; \
+ } while (0)
+#define MP_SIZE_T_SWAP(x, y) \
+ do { \
+ mp_size_t __mp_size_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_size_t_swap__tmp; \
+ } while (0)
+
+#define MP_PTR_SWAP(x, y) \
+ do { \
+ mp_ptr __mp_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_ptr_swap__tmp; \
+ } while (0)
+#define MP_SRCPTR_SWAP(x, y) \
+ do { \
+ mp_srcptr __mp_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_srcptr_swap__tmp; \
+ } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_PTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
+ } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_SRCPTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
+ } while(0)
+
+#define MPZ_PTR_SWAP(x, y) \
+ do { \
+ mpz_ptr __mpz_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_ptr_swap__tmp; \
+ } while (0)
+#define MPZ_SRCPTR_SWAP(x, y) \
+ do { \
+ mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_srcptr_swap__tmp; \
+ } while (0)
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* FIXME: These are purely internal, so do a search and replace to change
+ them to __gmp forms, rather than using these macros. */
+#define _mp_allocate_func __gmp_allocate_func
+#define _mp_reallocate_func __gmp_reallocate_func
+#define _mp_free_func __gmp_free_func
+#define _mp_default_allocate __gmp_default_allocate
+#define _mp_default_reallocate __gmp_default_reallocate
+#define _mp_default_free __gmp_default_free
+
+extern void * (*_mp_allocate_func) _PROTO ((size_t));
+extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
+extern void (*_mp_free_func) _PROTO ((void *, size_t));
+
+void *_mp_default_allocate _PROTO ((size_t));
+void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
+void _mp_default_free _PROTO ((void *, size_t));
+
+#define _MP_ALLOCATE_FUNC_TYPE(n,type) \
+ ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
+#define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
+
+#define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
+#define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
+
+
+#if (__STDC__-0) || defined (__cplusplus)
+
+#else
+
+#define const /* Empty */
+#define signed /* Empty */
+
+#endif
+
+#if defined (__GNUC__) && defined (__i386__)
+#if 0 /* check that these actually improve things */
+#define MPN_COPY_INCR(DST, SRC, N) \
+ __asm__ ("cld\n\trep\n\tmovsl" : : \
+ "D" (DST), "S" (SRC), "c" (N) : \
+ "cx", "di", "si", "memory")
+#define MPN_COPY_DECR(DST, SRC, N) \
+ __asm__ ("std\n\trep\n\tmovsl" : : \
+ "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
+ "cx", "di", "si", "memory")
+#define MPN_NORMALIZE_NOT_ZERO(P, N) \
+ do { \
+ __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
+ "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
+ "cx", "di"); \
+ (N)++; \
+ } while (0)
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+#define mpn_copyi __MPN(copyi)
+void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+/* Remap names of internal mpn functions. */
+#define __clz_tab __MPN(clz_tab)
+#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
+#define mpn_reciprocal __MPN(reciprocal)
+
+#define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
+#define mpn_bz_divrem_n __MPN(bz_divrem_n)
+/* #define mpn_tdiv_q __MPN(tdiv_q) */
+
+#define mpn_kara_mul_n __MPN(kara_mul_n)
+void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_kara_sqr_n __MPN(kara_sqr_n)
+void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_toom3_mul_n __MPN(toom3_mul_n)
+void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
+
+#define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
+void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_fft_best_k __MPN(fft_best_k)
+int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
+
+#define mpn_mul_fft __MPN(mul_fft)
+void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml,
+ int k));
+
+#define mpn_mul_fft_full __MPN(mul_fft_full)
+void mpn_mul_fft_full _PROTO ((mp_ptr op,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml));
+
+#define mpn_fft_next_size __MPN(fft_next_size)
+mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
+
+mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+/* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
+
+/* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
+#ifndef MPN_COPY_INCR
+#if HAVE_NATIVE_mpn_copyi
+#define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
+#else
+#define MPN_COPY_INCR(DST, SRC, NLIMBS) \
+ do { \
+ mp_size_t __i; \
+ for (__i = 0; __i < (NLIMBS); __i++) \
+ (DST)[__i] = (SRC)[__i]; \
+ } while (0)
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_copyd
+#define mpn_copyd __MPN(copyd)
+void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+/* NLIMBS==0 allowed */
+#ifndef MPN_COPY_DECR
+#if HAVE_NATIVE_mpn_copyd
+#define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
+#else
+#define MPN_COPY_DECR(DST, SRC, NLIMBS) \
+ do { \
+ mp_size_t __i; \
+ for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
+ (DST)[__i] = (SRC)[__i]; \
+ } while (0)
+#endif
+#endif
+
+/* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
+ rely on function inlining. */
+#if defined (_CRAY) || defined (__uxp__)
+static inline void
+_MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
+{
+ int i; /* Faster for Cray with plain int */
+#pragma _CRI ivdep /* Cray PVP systems */
+#pragma loop noalias d,s /* Fujitsu VPP systems */
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+}
+#define MPN_COPY _MPN_COPY
+#endif
+
+#ifndef MPN_COPY
+#define MPN_COPY MPN_COPY_INCR
+#endif
+
+/* Zero NLIMBS *limbs* AT DST. */
+#ifndef MPN_ZERO
+#define MPN_ZERO(DST, NLIMBS) \
+ do { \
+ mp_size_t __i; \
+ for (__i = 0; __i < (NLIMBS); __i++) \
+ (DST)[__i] = 0; \
+ } while (0)
+#endif
+
+#ifndef MPN_NORMALIZE
+#define MPN_NORMALIZE(DST, NLIMBS) \
+ do { \
+ while (NLIMBS > 0) \
+ { \
+ if ((DST)[(NLIMBS) - 1] != 0) \
+ break; \
+ NLIMBS--; \
+ } \
+ } while (0)
+#endif
+#ifndef MPN_NORMALIZE_NOT_ZERO
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
+ do { \
+ while (1) \
+ { \
+ if ((DST)[(NLIMBS) - 1] != 0) \
+ break; \
+ NLIMBS--; \
+ } \
+ } while (0)
+#endif
+
+/* Strip least significant zero limbs from ptr,size by incrementing ptr and
+ decrementing size. The number in ptr,size must be non-zero, ie. size!=0
+ and somewhere a non-zero limb. */
+#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
+ do \
+ { \
+ ASSERT ((size) != 0); \
+ while ((ptr)[0] == 0) \
+ { \
+ (ptr)++; \
+ (size)--; \
+ ASSERT (size >= 0); \
+ } \
+ } \
+ while (0)
+
+/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
+ temporary variable; it will be automatically cleared out at function
+ return. We use __x here to make it possible to accept both mpz_ptr and
+ mpz_t arguments. */
+#define MPZ_TMP_INIT(X, NLIMBS) \
+ do { \
+ mpz_ptr __x = (X); \
+ __x->_mp_alloc = (NLIMBS); \
+ __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
+ } while (0)
+
+/* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
+#define MPZ_REALLOC(what,needed) \
+ do { \
+ if ((needed) > ALLOC (what)) \
+ _mpz_realloc (what, needed); \
+ } while (0)
+
+/* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
+ value which is good on most machines. */
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 32
+#endif
+
+/* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
+ value which is good on most machines. */
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 256
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
+#endif
+
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
+#endif
+
+/* First k to use for an FFT modF multiply. A modF FFT is an order
+ log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
+ whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
+#define FFT_FIRST_K 4
+
+/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
+#endif
+
+/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
+ will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
+ NxN->2N multiply and not recursing into itself is an order
+ log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
+ is the first better than toom3. */
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
+#endif
+
+/* Table of thresholds for successive modF FFT "k"s. The first entry is
+ where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
+ etc. See mpn_fft_best_k(). */
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE \
+ { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
+ TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
+ TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
+ TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
+ TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
+ TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
+ 0 }
+#endif
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE \
+ { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
+ TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
+ TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
+ TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
+ TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
+ TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
+ 0 }
+#endif
+
+#ifndef FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS static const
+#endif
+
+#define MPN_FFT_TABLE_SIZE 16
+
+
+/* Return non-zero if xp,xsize and yp,ysize overlap.
+ If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
+ overlap. If both these are false, there's an overlap. */
+#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
+ ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
+
+
+/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
+ ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
+ does it always. Generally assertions are meant for development, but
+ might help when looking for a problem later too.
+
+ ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
+ assertion checking is disabled, the expression is still evaluated. This
+ is meant for use with routines like mpn_add_n() where the return value
+ represents a carry or whatever that shouldn't occur. For example,
+ ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
+
+#ifdef __LINE__
+#define ASSERT_LINE __LINE__
+#else
+#define ASSERT_LINE -1
+#endif
+
+#ifdef __FILE__
+#define ASSERT_FILE __FILE__
+#else
+#define ASSERT_FILE ""
+#endif
+
+int __gmp_assert_fail _PROTO((const char *filename, int linenum,
+ const char *expr));
+
+#if HAVE_STRINGIZE
+#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
+#else
+#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
+#endif
+
+#if HAVE_VOID
+#define CAST_TO_VOID (void)
+#else
+#define CAST_TO_VOID
+#endif
+
+#define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
+
+#if WANT_ASSERT
+#define ASSERT(expr) ASSERT_ALWAYS (expr)
+#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
+
+#else
+#define ASSERT(expr) (CAST_TO_VOID 0)
+#define ASSERT_NOCARRY(expr) (expr)
+#endif
+
+
+#if HAVE_NATIVE_mpn_com_n
+#define mpn_com_n __MPN(com_n)
+void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_com_n(d,s,n) \
+ do \
+ { \
+ mp_ptr __d = (d); \
+ mp_srcptr __s = (s); \
+ mp_size_t __n = (n); \
+ do \
+ *__d++ = ~ *__s++; \
+ while (--__n); \
+ } \
+ while (0)
+#endif
+
+#define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
+ do \
+ { \
+ mp_ptr __d = (d); \
+ mp_srcptr __s1 = (s1); \
+ mp_srcptr __s2 = (s2); \
+ mp_size_t __n = (n); \
+ do \
+ *__d++ = dop (*__s1++ op s2op *__s2++); \
+ while (--__n); \
+ } \
+ while (0)
+
+#if HAVE_NATIVE_mpn_and_n
+#define mpn_and_n __MPN(and_n)
+void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
+#endif
+
+#if HAVE_NATIVE_mpn_andn_n
+#define mpn_andn_n __MPN(andn_n)
+void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
+#endif
+
+#if HAVE_NATIVE_mpn_nand_n
+#define mpn_nand_n __MPN(nand_n)
+void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
+#endif
+
+#if HAVE_NATIVE_mpn_ior_n
+#define mpn_ior_n __MPN(ior_n)
+void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
+#endif
+
+#if HAVE_NATIVE_mpn_iorn_n
+#define mpn_iorn_n __MPN(iorn_n)
+void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
+#endif
+
+#if HAVE_NATIVE_mpn_nior_n
+#define mpn_nior_n __MPN(nior_n)
+void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
+#endif
+
+#if HAVE_NATIVE_mpn_xor_n
+#define mpn_xor_n __MPN(xor_n)
+void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
+#endif
+
+#if HAVE_NATIVE_mpn_xnor_n
+#define mpn_xnor_n __MPN(xnor_n)
+void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
+#endif
+
+/* Structure for conversion between internal binary format and
+ strings in base 2..36. */
+struct bases
+{
+ /* Number of digits in the conversion base that always fits in an mp_limb_t.
+ For example, for base 10 on a machine where a mp_limb_t has 32 bits this
+ is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
+ int chars_per_limb;
+
+ /* log(2)/log(conversion_base) */
+ double chars_per_bit_exactly;
+
+ /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
+ factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
+ i.e. the number of bits used to represent each digit in the base. */
+ mp_limb_t big_base;
+
+ /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
+ fixed-point number. Instead of dividing by big_base an application can
+ choose to multiply by big_base_inverted. */
+ mp_limb_t big_base_inverted;
+};
+
+#define __mp_bases __MPN(mp_bases)
+extern const struct bases __mp_bases[];
+extern mp_size_t __gmp_default_fp_limb_precision;
+
+#if defined (__i386__)
+#define TARGET_REGISTER_STARVED 1
+#else
+#define TARGET_REGISTER_STARVED 0
+#endif
+
+/* Use a library function for invert_limb, if available. */
+#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
+#define mpn_invert_limb __MPN(invert_limb)
+mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
+#define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
+#endif
+
+#ifndef invert_limb
+#define invert_limb(invxl,xl) \
+ do { \
+ mp_limb_t dummy; \
+ if (xl << 1 == 0) \
+ invxl = ~(mp_limb_t) 0; \
+ else \
+ udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
+ } while (0)
+#endif
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ If this would yield overflow, DI should be the largest possible number
+ (i.e., only ones). For correct operation, the most significant bit of D
+ has to be set. Put the quotient in Q and the remainder in R. */
+#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
+ do { \
+ mp_limb_t _q, _ql, _r; \
+ mp_limb_t _xh, _xl; \
+ umul_ppmm (_q, _ql, (nh), (di)); \
+ _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
+ umul_ppmm (_xh, _xl, _q, (d)); \
+ sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
+ if (_xh != 0) \
+ { \
+ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
+ _q += 1; \
+ if (_xh != 0) \
+ { \
+ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
+ _q += 1; \
+ } \
+ } \
+ if (_r >= (d)) \
+ { \
+ _r -= (d); \
+ _q += 1; \
+ } \
+ (r) = _r; \
+ (q) = _q; \
+ } while (0)
+/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
+ so that its most significant bit is set. LGUP is ceil(log2(D)). */
+#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
+ do { \
+ mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
+ mp_limb_t _xh, _xl; \
+ _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
+ _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
+ _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
+ _nadj = _n10 + (_n1 & (dnorm)); \
+ umul_ppmm (_xh, _xl, di, _n2 - _n1); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
+ _q1 = ~(_n2 + _xh); \
+ umul_ppmm (_xh, _xl, _q1, d); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
+ _xh -= (d); \
+ (r) = _xl + ((d) & _xh); \
+ (q) = _xh - _q1; \
+ } while (0)
+/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
+ version to use. */
+#define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
+ do { \
+ mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
+ mp_limb_t _xh, _xl; \
+ _n2 = (nh); \
+ _n10 = (nl); \
+ _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
+ _nadj = _n10 + (_n1 & (d)); \
+ umul_ppmm (_xh, _xl, di, _n2 - _n1); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
+ _q1 = ~(_n2 + _xh); \
+ umul_ppmm (_xh, _xl, _q1, d); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
+ _xh -= (d); \
+ (r) = _xl + ((d) & _xh); \
+ (q) = _xh - _q1; \
+ } while (0)
+
+
+/* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
+ 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
+ "n" must be odd (otherwise such an inverse doesn't exist).
+
+ This is not to be confused with invert_limb(), which is completely
+ different.
+
+ The table lookup gives an inverse with the low 8 bits valid, and each
+ multiply step doubles the number of bits. See Jebelean's exact division
+ paper, end of section 4 (reference in gmp.texi). */
+
+#define modlimb_invert_table __gmp_modlimb_invert_table
+extern const unsigned char modlimb_invert_table[128];
+
+#if BITS_PER_MP_LIMB <= 32
+#define modlimb_invert(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
+#define modlimb_invert(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+
+/* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
+ between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
+ until then. */
+#if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
+/* Define stuff for longlong.h. */
+typedef unsigned int UQItype __attribute__ ((mode (QI)));
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#if defined _LONGLONG || defined _LONG_LONG_LIMB
+typedef long long int DItype;
+typedef unsigned long long int UDItype;
+#else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
+typedef long int DItype;
+typedef unsigned long int UDItype;
+#endif
+#endif
+
+typedef mp_limb_t UWtype;
+typedef unsigned int UHWtype;
+#define W_TYPE_SIZE BITS_PER_MP_LIMB
+
+/* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
+
+#if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
+/* Special case for little endian ARM since floats remain in big-endian. */
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned int manh:20;
+ unsigned int exp:11;
+ unsigned int sig:1;
+ unsigned int manl:32;
+ } s;
+ double d;
+};
+#else
+#if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
+ || defined (__alpha) \
+ || defined (__clipper__) \
+ || defined (__cris) \
+ || defined (__i386__) \
+ || defined (__i860__) \
+ || defined (__i960__) \
+ || defined (MIPSEL) || defined (_MIPSEL) \
+ || defined (__ns32000__) \
+ || defined (__WINNT) || defined (_WIN32)
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned int manl:32;
+ unsigned int manh:20;
+ unsigned int exp:11;
+ unsigned int sig:1;
+ } s;
+ double d;
+};
+#else /* Need this as an #else since the tests aren't made exclusive. */
+#if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
+ || defined (__a29k__) || defined (_AM29K) \
+ || defined (__arm__) \
+ || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
+ || defined (_CRAYMPP) \
+ || defined (__i370__) || defined (__mvs__) \
+ || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
+ || defined(mc68020) \
+ || defined (__m88000__) \
+ || defined (MIPSEB) || defined (_MIPSEB) \
+ || defined (__hppa) || defined (__hppa__) \
+ || defined (__pyr__) \
+ || defined (__ibm032__) \
+ || defined (_IBMR2) || defined (_ARCH_PPC) \
+ || defined (__sh__) \
+ || defined (__sparc) || defined (sparc) \
+ || defined (__we32k__)
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned int sig:1;
+ unsigned int exp:11;
+ unsigned int manh:20;
+ unsigned int manl:32;
+ } s;
+ double d;
+};
+#endif
+#endif
+#endif
+
+/* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
+ SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
+ presumably due to treating the mp_limb_t constant as signed rather than
+ unsigned. */
+#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
+#if BITS_PER_MP_LIMB == 64
+#define LIMBS_PER_DOUBLE 2
+#else
+#define LIMBS_PER_DOUBLE 3
+#endif
+
+double __gmp_scale2 _PROTO ((double, int));
+int __gmp_extract_double _PROTO ((mp_ptr, double));
+
+extern int __gmp_junk;
+extern const int __gmp_0;
+#define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
+#define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
+#define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
+
+#if defined _LONG_LONG_LIMB
+#if defined (__STDC__)
+#define CNST_LIMB(C) C##LL
+#else
+#define CNST_LIMB(C) C/**/LL
+#endif
+#else /* not _LONG_LONG_LIMB */
+#if defined (__STDC__)
+#define CNST_LIMB(C) C##L
+#else
+#define CNST_LIMB(C) C/**/L
+#endif
+#endif /* _LONG_LONG_LIMB */
+
+/*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
+#if BITS_PER_MP_LIMB == 32
+#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
+#define PP_INVERTED 0x53E5645CL
+#define PP_MAXPRIME 29
+#define PP_MASK 0x208A28A8L
+#endif
+
+#if BITS_PER_MP_LIMB == 64
+#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
+#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
+#define PP_MAXPRIME 53
+#define PP_MASK CNST_LIMB(0x208A20A08A28A8)
+#endif
+
+
+/* BIT1 means a result value in bit 1 (second least significant bit), with a
+ zero bit representing +1 and a one bit representing -1. Bits other than
+ bit 1 are garbage.
+
+ JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
+ and their speed is important. Expressions are used rather than
+ conditionals to accumulate sign changes, which effectively means XORs
+ instead of conditional JUMPs. */
+
+/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_S0(a) \
+ (((a) == 1) | ((a) == -1))
+
+/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_U0(a) \
+ ((a) == 1)
+
+/* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
+ An mpz_t always has at least one limb of allocated space, so the fetch of
+ the low limb is valid. */
+#define JACOBI_Z0(a) \
+ (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
+
+/* Convert a bit1 to +1 or -1. */
+#define JACOBI_BIT1_TO_PN(result_bit1) \
+ (1 - ((result_bit1) & 2))
+
+/* (2/b), with b unsigned and odd;
+ is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
+ hence obtained from (b>>1)^b */
+#define JACOBI_TWO_U_BIT1(b) \
+ (ASSERT (b & 1), (((b) >> 1) ^ (b)))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U_BIT1(twos, b) \
+ (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U(twos, b) \
+ (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
+
+/* (a/b) effect due to sign of a: signed/unsigned, b odd;
+ is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
+#define JACOBI_ASGN_SU_BIT1(a, b) \
+ ((((a) < 0) << 1) & (b))
+
+/* (a/b) effect due to sign of b: signed/mpz;
+ is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_SZ_BIT1(a, b) \
+ ((((a) < 0) & (SIZ(b) < 0)) << 1)
+
+/* (a/b) effect due to sign of b: mpz/signed */
+#define JACOBI_BSGN_ZS_BIT1(a, b) \
+ JACOBI_BSGN_SZ_BIT1(b, a)
+
+/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
+ Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
+ both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
+ because this is used in a couple of places with only bit 1 of a or b
+ valid. */
+#define JACOBI_RECIP_UU_BIT1(a, b) \
+ ((a) & (b))
+
+
+/* For testing and debugging. */
+#define MPZ_CHECK_FORMAT(z) \
+ (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
+ ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
+#define MPZ_PROVOKE_REALLOC(z) \
+ do { ALLOC(z) = ABSIZ(z); } while (0)
+
+
+#if TUNE_PROGRAM_BUILD
+/* Some extras wanted when recompiling some .c files for use by the tune
+ program. Not part of a normal build. */
+
+extern mp_size_t mul_threshold[];
+extern mp_size_t fft_modf_mul_threshold;
+extern mp_size_t sqr_threshold[];
+extern mp_size_t fft_modf_sqr_threshold;
+extern mp_size_t bz_threshold[];
+extern mp_size_t fib_threshold[];
+extern mp_size_t powm_threshold[];
+extern mp_size_t gcd_accel_threshold[];
+extern mp_size_t gcdext_threshold[];
+
+#undef KARATSUBA_MUL_THRESHOLD
+#undef TOOM3_MUL_THRESHOLD
+#undef FFT_MUL_TABLE
+#undef FFT_MUL_THRESHOLD
+#undef FFT_MODF_MUL_THRESHOLD
+#undef KARATSUBA_SQR_THRESHOLD
+#undef TOOM3_SQR_THRESHOLD
+#undef FFT_SQR_TABLE
+#undef FFT_SQR_THRESHOLD
+#undef FFT_MODF_SQR_THRESHOLD
+#undef BZ_THRESHOLD
+#undef FIB_THRESHOLD
+#undef POWM_THRESHOLD
+#undef GCD_ACCEL_THRESHOLD
+#undef GCDEXT_THRESHOLD
+
+#define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
+#define TOOM3_MUL_THRESHOLD mul_threshold[1]
+#define FFT_MUL_TABLE 0
+#define FFT_MUL_THRESHOLD mul_threshold[2]
+#define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
+#define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
+#define TOOM3_SQR_THRESHOLD sqr_threshold[1]
+#define FFT_SQR_TABLE 0
+#define FFT_SQR_THRESHOLD sqr_threshold[2]
+#define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
+#define BZ_THRESHOLD bz_threshold[0]
+#define FIB_THRESHOLD fib_threshold[0]
+#define POWM_THRESHOLD powm_threshold[0]
+#define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
+#define GCDEXT_THRESHOLD gcdext_threshold[0]
+
+#define TOOM3_MUL_THRESHOLD_LIMIT 700
+
+#undef FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS
+extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
+
+#endif /* TUNE_PROGRAM_BUILD */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/rts/gmp/gmp.h b/rts/gmp/gmp.h
new file mode 100644
index 0000000000..0f1b9510e9
--- /dev/null
+++ b/rts/gmp/gmp.h
@@ -0,0 +1,1083 @@
+/* gmp.h -- Definitions for GNU multiple precision functions.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifndef __GMP_H__
+
+#ifndef __GNU_MP__ /* to allow inclusion of both gmp.h and mp.h */
+#define __GNU_MP__ 2
+#define __need_size_t
+#include <stddef.h>
+#undef __need_size_t
+
+#ifndef STG_H
+/* Get DLL_IMPORT */
+#include "../../includes/ghcconfig.h"
+#include "../../includes/StgDLL.h"
+#endif
+
+#if defined (__mips) && defined (_ABIN32)
+/* Force the use of 64-bit limbs for all 64-bit MIPS CPUs if ABI permits. */
+#define _LONG_LONG_LIMB
+#endif
+
+#if (__STDC__-0) || defined (__cplusplus)
+#define __gmp_const const
+#define __gmp_signed signed
+#else
+#define __gmp_const
+#define __gmp_signed
+#endif
+
+#if defined (__GNUC__)
+#define __gmp_inline __inline__
+#else
+#define __gmp_inline
+#endif
+
+#ifndef _EXTERN_INLINE
+#ifdef __GNUC__
+#define _EXTERN_INLINE extern __inline__
+#else
+#define _EXTERN_INLINE static
+#endif
+#endif
+
+#ifdef _SHORT_LIMB
+typedef unsigned int mp_limb_t;
+typedef int mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb_t;
+typedef long long int mp_limb_signed_t;
+#else
+typedef unsigned long int mp_limb_t;
+typedef long int mp_limb_signed_t;
+#endif
+#endif
+
+typedef mp_limb_t * mp_ptr;
+typedef __gmp_const mp_limb_t * mp_srcptr;
+#if defined (_CRAY) && ! defined (_CRAYMPP)
+/* plain `int' is much faster (48 bits) */
+typedef int mp_size_t;
+typedef int mp_exp_t;
+#else
+typedef long int mp_size_t;
+typedef long int mp_exp_t;
+#endif
+
+typedef struct
+{
+ int _mp_alloc; /* Number of *limbs* allocated and pointed
+ to by the _mp_d field. */
+ int _mp_size; /* abs(_mp_size) is the number of limbs the
+ last field points to. If _mp_size is
+ negative this is a negative number. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpz_struct;
+#endif /* __GNU_MP__ */
+
+typedef __mpz_struct MP_INT;
+typedef __mpz_struct mpz_t[1];
+
+typedef struct
+{
+ __mpz_struct _mp_num;
+ __mpz_struct _mp_den;
+} __mpq_struct;
+
+typedef __mpq_struct MP_RAT;
+typedef __mpq_struct mpq_t[1];
+
+typedef struct
+{
+ int _mp_prec; /* Max precision, in number of `mp_limb_t's.
+ Set by mpf_init and modified by
+ mpf_set_prec. The area pointed to by the
+ _mp_d field contains `prec' + 1 limbs. */
+ int _mp_size; /* abs(_mp_size) is the number of limbs the
+ last field points to. If _mp_size is
+ negative this is a negative number. */
+ mp_exp_t _mp_exp; /* Exponent, in the base of `mp_limb_t'. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpf_struct;
+
+/* typedef __mpf_struct MP_FLOAT; */
+typedef __mpf_struct mpf_t[1];
+
+/* Available random number generation algorithms. */
+typedef enum
+{
+ GMP_RAND_ALG_DEFAULT = 0,
+ GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential. */
+} gmp_randalg_t;
+
+/* Linear congruential data struct. */
+typedef struct {
+ mpz_t a; /* Multiplier. */
+ unsigned long int c; /* Adder. */
+ mpz_t m; /* Modulus (valid only if m2exp == 0). */
+ unsigned long int m2exp; /* If != 0, modulus is 2 ^ m2exp. */
+} __gmp_randata_lc;
+
+/* Random state struct. */
+typedef struct
+{
+ mpz_t seed; /* Current seed. */
+ gmp_randalg_t alg; /* Algorithm used. */
+ union { /* Algorithm specific data. */
+ __gmp_randata_lc *lc; /* Linear congruential. */
+ } algdata;
+} __gmp_randstate_struct;
+typedef __gmp_randstate_struct gmp_randstate_t[1];
+
+/* Types for function declarations in gmp files. */
+/* ??? Should not pollute user name space with these ??? */
+typedef __gmp_const __mpz_struct *mpz_srcptr;
+typedef __mpz_struct *mpz_ptr;
+typedef __gmp_const __mpf_struct *mpf_srcptr;
+typedef __mpf_struct *mpf_ptr;
+typedef __gmp_const __mpq_struct *mpq_srcptr;
+typedef __mpq_struct *mpq_ptr;
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+#ifndef __MPN
+/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */
+#if defined (__STDC__) || defined (__cplusplus)
+#define __MPN(x) __gmpn_##x
+#else
+#define __MPN(x) __gmpn_/**/x
+#endif
+#endif
+
+#if defined (FILE) || defined (H_STDIO) || defined (_H_STDIO) \
+ || defined (_STDIO_H) || defined (_STDIO_H_) || defined (__STDIO_H__) \
+ || defined (_STDIO_INCLUDED) || defined (__dj_include_stdio_h_)
+#define _GMP_H_HAVE_FILE 1
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+DLL_IMPORT void mp_set_memory_functions _PROTO ((void *(*) (size_t),
+ void *(*) (void *, size_t, size_t),
+ void (*) (void *, size_t)));
+
+#define mp_bits_per_limb __gmp_bits_per_limb
+DLL_IMPORT extern __gmp_const int mp_bits_per_limb;
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/**************** Random number routines. ****************/
+
+#define _gmp_rand __gmp_rand
+#define gmp_randinit __gmp_randinit
+#define gmp_randinit_lc __gmp_randinit_lc
+#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
+#define gmp_randseed __gmp_randseed
+#define gmp_randseed_ui __gmp_randseed_ui
+#define gmp_randclear __gmp_randclear
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+DLL_IMPORT void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int));
+DLL_IMPORT void gmp_randinit _PROTO ((gmp_randstate_t, gmp_randalg_t, ...));
+DLL_IMPORT void gmp_randinit_lc _PROTO ((gmp_randstate_t, mpz_t, unsigned long int,
+ mpz_t));
+DLL_IMPORT void gmp_randinit_lc_2exp _PROTO ((gmp_randstate_t, mpz_t, unsigned long int,
+ unsigned long int));
+DLL_IMPORT void gmp_randseed _PROTO ((gmp_randstate_t, mpz_t));
+DLL_IMPORT void gmp_randseed_ui _PROTO ((gmp_randstate_t, unsigned long int));
+DLL_IMPORT void gmp_randclear _PROTO ((gmp_randstate_t));
+
+#if defined (__cplusplus)
+}
+#endif
+
+/**************** Integer (i.e. Z) routines. ****************/
+
+#define _mpz_realloc __gmpz_realloc
+#define mpz_realloc __gmpz_realloc
+#define mpz_abs __gmpz_abs
+#define mpz_add __gmpz_add
+#define mpz_add_ui __gmpz_add_ui
+#define mpz_addmul_ui __gmpz_addmul_ui
+#define mpz_and __gmpz_and
+#define mpz_array_init __gmpz_array_init
+#define mpz_bin_ui __gmpz_bin_ui
+#define mpz_bin_uiui __gmpz_bin_uiui
+#define mpz_cdiv_q __gmpz_cdiv_q
+#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
+#define mpz_cdiv_qr __gmpz_cdiv_qr
+#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
+#define mpz_cdiv_r __gmpz_cdiv_r
+#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
+#define mpz_cdiv_ui __gmpz_cdiv_ui
+#define mpz_clear __gmpz_clear
+#define mpz_clrbit __gmpz_clrbit
+#define mpz_cmp __gmpz_cmp
+#define _mpz_cmp_si __gmpz_cmp_si
+#define _mpz_cmp_ui __gmpz_cmp_ui
+#define mpz_cmpabs __gmpz_cmpabs
+#define mpz_cmpabs_ui __gmpz_cmpabs_ui
+#define mpz_com __gmpz_com
+#define mpz_divexact __gmpz_divexact
+#define mpz_dump __gmpz_dump
+#define mpz_fac_ui __gmpz_fac_ui
+#define mpz_fdiv_q __gmpz_fdiv_q
+#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
+#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
+#define mpz_fdiv_qr __gmpz_fdiv_qr
+#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
+#define mpz_fdiv_r __gmpz_fdiv_r
+#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
+#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
+#define mpz_fdiv_ui __gmpz_fdiv_ui
+#define mpz_fib_ui __gmpz_fib_ui
+#define mpz_fits_sint_p __gmpz_fits_sint_p
+#define mpz_fits_slong_p __gmpz_fits_slong_p
+#define mpz_fits_sshort_p __gmpz_fits_sshort_p
+#define mpz_fits_uint_p __gmpz_fits_uint_p
+#define mpz_fits_ulong_p __gmpz_fits_ulong_p
+#define mpz_fits_ushort_p __gmpz_fits_ushort_p
+#define mpz_gcd __gmpz_gcd
+#define mpz_gcd_ui __gmpz_gcd_ui
+#define mpz_gcdext __gmpz_gcdext
+#define mpz_get_d __gmpz_get_d
+#define mpz_get_si __gmpz_get_si
+#define mpz_get_str __gmpz_get_str
+#define mpz_get_ui __gmpz_get_ui
+#define mpz_getlimbn __gmpz_getlimbn
+#define mpz_hamdist __gmpz_hamdist
+#define mpz_init __gmpz_init
+#define mpz_inp_binary __gmpz_inp_binary
+#define mpz_inp_raw __gmpz_inp_raw
+#define mpz_inp_str __gmpz_inp_str
+#define mpz_init_set __gmpz_init_set
+#define mpz_init_set_d __gmpz_init_set_d
+#define mpz_init_set_si __gmpz_init_set_si
+#define mpz_init_set_str __gmpz_init_set_str
+#define mpz_init_set_ui __gmpz_init_set_ui
+#define mpz_invert __gmpz_invert
+#define mpz_ior __gmpz_ior
+#define mpz_jacobi __gmpz_jacobi
+#define mpz_lcm __gmpz_lcm
+#define mpz_legendre __gmpz_legendre
+#define mpz_mod __gmpz_mod
+#define mpz_mul __gmpz_mul
+#define mpz_mul_2exp __gmpz_mul_2exp
+#define mpz_neg __gmpz_neg
+#define mpz_nextprime __gmpz_nextprime
+#define mpz_out_binary __gmpz_out_binary
+#define mpz_out_raw __gmpz_out_raw
+#define mpz_out_str __gmpz_out_str
+#define mpz_perfect_power_p __gmpz_perfect_power_p
+#define mpz_perfect_square_p __gmpz_perfect_square_p
+#define mpz_popcount __gmpz_popcount
+#define mpz_pow_ui __gmpz_pow_ui
+#define mpz_powm __gmpz_powm
+#define mpz_powm_ui __gmpz_powm_ui
+#define mpz_probab_prime_p __gmpz_probab_prime_p
+#define mpz_random __gmpz_random
+#define mpz_random2 __gmpz_random2
+#define mpz_remove __gmpz_remove
+#define mpz_root __gmpz_root
+#define mpz_rrandomb __gmpz_rrandomb
+#define mpz_scan0 __gmpz_scan0
+#define mpz_scan1 __gmpz_scan1
+#define mpz_set __gmpz_set
+#define mpz_set_d __gmpz_set_d
+#define mpz_set_f __gmpz_set_f
+#define mpz_set_q __gmpz_set_q
+#define mpz_set_si __gmpz_set_si
+#define mpz_set_str __gmpz_set_str
+#define mpz_set_ui __gmpz_set_ui
+#define mpz_setbit __gmpz_setbit
+#define mpz_size __gmpz_size
+#define mpz_sizeinbase __gmpz_sizeinbase
+#define mpz_sqrt __gmpz_sqrt
+#define mpz_sqrtrem __gmpz_sqrtrem
+#define mpz_sub __gmpz_sub
+#define mpz_sub_ui __gmpz_sub_ui
+#define mpz_swap __gmpz_swap
+#define mpz_tdiv_ui __gmpz_tdiv_ui
+#define mpz_tdiv_q __gmpz_tdiv_q
+#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
+#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
+#define mpz_tdiv_qr __gmpz_tdiv_qr
+#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
+#define mpz_tdiv_r __gmpz_tdiv_r
+#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
+#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
+#define mpz_tstbit __gmpz_tstbit
+#define mpz_ui_pow_ui __gmpz_ui_pow_ui
+#define mpz_urandomb __gmpz_urandomb
+#define mpz_urandomm __gmpz_urandomm
+#define mpz_xor __gmpz_xor
+#define mpz_eor __gmpz_xor
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+DLL_IMPORT void *_mpz_realloc _PROTO ((mpz_ptr, mp_size_t));
+
+DLL_IMPORT void mpz_abs _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_add _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_add_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_addmul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_and _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_array_init _PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+DLL_IMPORT void mpz_bin_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_bin_uiui _PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+DLL_IMPORT void mpz_cdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_cdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_cdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_cdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_cdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_cdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_cdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_clear _PROTO ((mpz_ptr));
+DLL_IMPORT void mpz_clrbit _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT int mpz_cmp _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int _mpz_cmp_si _PROTO ((mpz_srcptr, signed long int));
+DLL_IMPORT int _mpz_cmp_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT int mpz_cmpabs _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_cmpabs_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_com _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_divexact _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_dump _PROTO ((mpz_srcptr));
+DLL_IMPORT void mpz_fac_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT void mpz_fdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_fdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_fdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_fdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_fdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_fdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_fdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_fdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_fdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_fib_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT int mpz_fits_sint_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_slong_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_sshort_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_uint_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_ulong_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_ushort_p _PROTO ((mpz_srcptr));
+DLL_IMPORT void mpz_gcd _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_gcd_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_gcdext _PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT double mpz_get_d _PROTO ((mpz_srcptr));
+/* signed */ long int mpz_get_si _PROTO ((mpz_srcptr));
+DLL_IMPORT char *mpz_get_str _PROTO ((char *, int, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_get_ui _PROTO ((mpz_srcptr));
+DLL_IMPORT mp_limb_t mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t));
+DLL_IMPORT unsigned long int mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_init _PROTO ((mpz_ptr));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpz_inp_binary _PROTO ((mpz_ptr, FILE *));
+DLL_IMPORT size_t mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
+DLL_IMPORT size_t mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
+#endif
+DLL_IMPORT void mpz_init_set _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_init_set_d _PROTO ((mpz_ptr, double));
+DLL_IMPORT void mpz_init_set_si _PROTO ((mpz_ptr, signed long int));
+DLL_IMPORT int mpz_init_set_str _PROTO ((mpz_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpz_init_set_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT int mpz_invert _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_ior _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_jacobi _PROTO ((mpz_srcptr, mpz_srcptr));
+
+#define mpz_kronecker_si __gmpz_kronecker_si
+DLL_IMPORT int mpz_kronecker_si _PROTO ((mpz_srcptr, long));
+
+#define mpz_kronecker_ui __gmpz_kronecker_ui
+DLL_IMPORT int mpz_kronecker_ui _PROTO ((mpz_srcptr, unsigned long));
+
+#define mpz_si_kronecker __gmpz_si_kronecker
+DLL_IMPORT int mpz_si_kronecker _PROTO ((long, mpz_srcptr));
+
+#define mpz_ui_kronecker __gmpz_ui_kronecker
+DLL_IMPORT int mpz_ui_kronecker _PROTO ((unsigned long, mpz_srcptr));
+
+DLL_IMPORT void mpz_lcm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_legendre _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_mod _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_mul_si __gmpz_mul_si
+DLL_IMPORT void mpz_mul_si _PROTO ((mpz_ptr, mpz_srcptr, long int));
+
+#define mpz_mul_ui __gmpz_mul_ui
+DLL_IMPORT void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+DLL_IMPORT void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_nextprime _PROTO ((mpz_ptr, mpz_srcptr));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpz_out_binary _PROTO ((FILE *, mpz_srcptr));
+DLL_IMPORT size_t mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
+DLL_IMPORT size_t mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
+#endif
+DLL_IMPORT int mpz_perfect_power_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_perfect_square_p _PROTO ((mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_popcount _PROTO ((mpz_srcptr));
+DLL_IMPORT void mpz_pow_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_powm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_powm_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
+DLL_IMPORT int mpz_probab_prime_p _PROTO ((mpz_srcptr, int));
+DLL_IMPORT void mpz_random _PROTO ((mpz_ptr, mp_size_t));
+DLL_IMPORT void mpz_random2 _PROTO ((mpz_ptr, mp_size_t));
+DLL_IMPORT unsigned long int mpz_remove _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_root _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_rrandomb _PROTO ((mpz_ptr, gmp_randstate_t, unsigned long int));
+DLL_IMPORT unsigned long int mpz_scan0 _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_scan1 _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_set _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_set_d _PROTO ((mpz_ptr, double));
+DLL_IMPORT void mpz_set_f _PROTO ((mpz_ptr, mpf_srcptr));
+DLL_IMPORT void mpz_set_q _PROTO ((mpz_ptr, mpq_srcptr));
+DLL_IMPORT void mpz_set_si _PROTO ((mpz_ptr, signed long int));
+DLL_IMPORT int mpz_set_str _PROTO ((mpz_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpz_set_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT void mpz_setbit _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT size_t mpz_size _PROTO ((mpz_srcptr));
+DLL_IMPORT size_t mpz_sizeinbase _PROTO ((mpz_srcptr, int));
+DLL_IMPORT void mpz_sqrt _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_sqrtrem _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_sub _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_sub_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_swap _PROTO ((mpz_ptr, mpz_ptr));
+DLL_IMPORT void mpz_tdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_tdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_tdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_tdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_tdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_tdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_tdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_tdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_tdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT int mpz_tstbit _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_ui_pow_ui _PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+DLL_IMPORT void mpz_urandomb _PROTO ((mpz_t, gmp_randstate_t, unsigned long int));
+DLL_IMPORT void mpz_urandomm _PROTO ((mpz_t, gmp_randstate_t, mpz_t));
+DLL_IMPORT void mpz_xor _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+#if defined (__cplusplus)
+}
+#endif
+
+/**************** Rational (i.e. Q) routines. ****************/
+
+#define mpq_init __gmpq_init
+#define mpq_clear __gmpq_clear
+#define mpq_set __gmpq_set
+#define mpq_set_ui __gmpq_set_ui
+#define mpq_set_si __gmpq_set_si
+#define mpq_set_z __gmpq_set_z
+#define mpq_add __gmpq_add
+#define mpq_sub __gmpq_sub
+#define mpq_mul __gmpq_mul
+#define mpq_div __gmpq_div
+#define mpq_neg __gmpq_neg
+#define mpq_cmp __gmpq_cmp
+#define _mpq_cmp_ui __gmpq_cmp_ui
+#define mpq_equal __gmpq_equal
+#define mpq_inv __gmpq_inv
+#define mpq_set_num __gmpq_set_num
+#define mpq_set_den __gmpq_set_den
+#define mpq_get_num __gmpq_get_num
+#define mpq_get_den __gmpq_get_den
+#define mpq_get_d __gmpq_get_d
+#define mpq_set_d __gmpq_set_d
+#define mpq_canonicalize __gmpq_canonicalize
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+DLL_IMPORT void mpq_init _PROTO ((mpq_ptr));
+DLL_IMPORT void mpq_clear _PROTO ((mpq_ptr));
+DLL_IMPORT void mpq_set _PROTO ((mpq_ptr, mpq_srcptr));
+DLL_IMPORT void mpq_set_ui _PROTO ((mpq_ptr, unsigned long int, unsigned long int));
+DLL_IMPORT void mpq_set_si _PROTO ((mpq_ptr, signed long int, unsigned long int));
+DLL_IMPORT void mpq_set_z _PROTO ((mpq_ptr, mpz_srcptr));
+DLL_IMPORT void mpq_add _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_sub _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_mul _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_div _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_neg _PROTO ((mpq_ptr, mpq_srcptr));
+DLL_IMPORT int mpq_cmp _PROTO ((mpq_srcptr, mpq_srcptr));
+DLL_IMPORT int _mpq_cmp_ui _PROTO ((mpq_srcptr, unsigned long int, unsigned long int));
+DLL_IMPORT int mpq_equal _PROTO ((mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_inv _PROTO ((mpq_ptr, mpq_srcptr));
+DLL_IMPORT void mpq_set_num _PROTO ((mpq_ptr, mpz_srcptr));
+DLL_IMPORT void mpq_set_den _PROTO ((mpq_ptr, mpz_srcptr));
+DLL_IMPORT void mpq_get_num _PROTO ((mpz_ptr, mpq_srcptr));
+DLL_IMPORT void mpq_get_den _PROTO ((mpz_ptr, mpq_srcptr));
+DLL_IMPORT double mpq_get_d _PROTO ((mpq_srcptr));
+DLL_IMPORT void mpq_set_d _PROTO ((mpq_ptr, double));
+DLL_IMPORT void mpq_canonicalize _PROTO ((mpq_ptr));
+
+#define mpq_swap __gmpq_swap
+DLL_IMPORT void mpq_swap _PROTO ((mpq_ptr, mpq_ptr));
+
+#ifdef _GMP_H_HAVE_FILE
+#define mpq_out_str __gmpq_out_str
+DLL_IMPORT size_t mpq_out_str _PROTO ((FILE *, int, mpq_srcptr));
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+/**************** Float (i.e. F) routines. ****************/
+
+#define mpf_abs __gmpf_abs
+#define mpf_add __gmpf_add
+#define mpf_add_ui __gmpf_add_ui
+#define mpf_ceil __gmpf_ceil
+#define mpf_clear __gmpf_clear
+#define mpf_cmp __gmpf_cmp
+#define mpf_cmp_si __gmpf_cmp_si
+#define mpf_cmp_ui __gmpf_cmp_ui
+#define mpf_div __gmpf_div
+#define mpf_div_2exp __gmpf_div_2exp
+#define mpf_div_ui __gmpf_div_ui
+#define mpf_dump __gmpf_dump
+#define mpf_floor __gmpf_floor
+#define mpf_eq __gmpf_eq
+#define mpf_get_d __gmpf_get_d
+#define mpf_get_prec __gmpf_get_prec
+#define mpf_get_str __gmpf_get_str
+#define mpf_init __gmpf_init
+#define mpf_init2 __gmpf_init2
+#define mpf_inp_str __gmpf_inp_str
+#define mpf_init_set __gmpf_init_set
+#define mpf_init_set_d __gmpf_init_set_d
+#define mpf_init_set_si __gmpf_init_set_si
+#define mpf_init_set_str __gmpf_init_set_str
+#define mpf_init_set_ui __gmpf_init_set_ui
+#define mpf_mul __gmpf_mul
+#define mpf_mul_2exp __gmpf_mul_2exp
+#define mpf_mul_ui __gmpf_mul_ui
+#define mpf_neg __gmpf_neg
+#define mpf_out_str __gmpf_out_str
+#define mpf_pow_ui __gmpf_pow_ui
+#define mpf_random2 __gmpf_random2
+#define mpf_reldiff __gmpf_reldiff
+#define mpf_set __gmpf_set
+#define mpf_set_d __gmpf_set_d
+#define mpf_set_default_prec __gmpf_set_default_prec
+#define mpf_set_prec __gmpf_set_prec
+#define mpf_set_prec_raw __gmpf_set_prec_raw
+#define mpf_set_q __gmpf_set_q
+#define mpf_set_si __gmpf_set_si
+#define mpf_set_str __gmpf_set_str
+#define mpf_set_ui __gmpf_set_ui
+#define mpf_set_z __gmpf_set_z
+#define mpf_size __gmpf_size
+#define mpf_sqrt __gmpf_sqrt
+#define mpf_sqrt_ui __gmpf_sqrt_ui
+#define mpf_sub __gmpf_sub
+#define mpf_sub_ui __gmpf_sub_ui
+#define mpf_trunc __gmpf_trunc
+#define mpf_ui_div __gmpf_ui_div
+#define mpf_ui_sub __gmpf_ui_sub
+#define mpf_urandomb __gmpf_urandomb
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+DLL_IMPORT void mpf_abs _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_add _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_add_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_ceil _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_clear _PROTO ((mpf_ptr));
+DLL_IMPORT int mpf_cmp _PROTO ((mpf_srcptr, mpf_srcptr));
+DLL_IMPORT int mpf_cmp_si _PROTO ((mpf_srcptr, signed long int));
+DLL_IMPORT int mpf_cmp_ui _PROTO ((mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_div _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_div_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_div_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_dump _PROTO ((mpf_srcptr));
+DLL_IMPORT int mpf_eq _PROTO ((mpf_srcptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_floor _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT double mpf_get_d _PROTO ((mpf_srcptr));
+DLL_IMPORT unsigned long int mpf_get_prec _PROTO ((mpf_srcptr));
+char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
+DLL_IMPORT void mpf_init _PROTO ((mpf_ptr));
+DLL_IMPORT void mpf_init2 _PROTO ((mpf_ptr, unsigned long int));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
+#endif
+DLL_IMPORT void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_init_set_d _PROTO ((mpf_ptr, double));
+DLL_IMPORT void mpf_init_set_si _PROTO ((mpf_ptr, signed long int));
+DLL_IMPORT int mpf_init_set_str _PROTO ((mpf_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpf_init_set_ui _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpf_out_str _PROTO ((FILE *, int, size_t, mpf_srcptr));
+#endif
+DLL_IMPORT void mpf_pow_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_random2 _PROTO ((mpf_ptr, mp_size_t, mp_exp_t));
+DLL_IMPORT void mpf_reldiff _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_set _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_set_d _PROTO ((mpf_ptr, double));
+DLL_IMPORT void mpf_set_default_prec _PROTO ((unsigned long int));
+DLL_IMPORT void mpf_set_prec _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_set_prec_raw _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_set_q _PROTO ((mpf_ptr, mpq_srcptr));
+DLL_IMPORT void mpf_set_si _PROTO ((mpf_ptr, signed long int));
+DLL_IMPORT int mpf_set_str _PROTO ((mpf_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpf_set_ui _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_set_z _PROTO ((mpf_ptr, mpz_srcptr));
+DLL_IMPORT size_t mpf_size _PROTO ((mpf_srcptr));
+DLL_IMPORT void mpf_sqrt _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_sqrt_ui _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_sub _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_sub_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_trunc _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_ui_div _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+DLL_IMPORT void mpf_ui_sub _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+DLL_IMPORT void mpf_urandomb _PROTO ((mpf_t, gmp_randstate_t, unsigned long int));
+
+#define mpf_swap __gmpf_swap
+DLL_IMPORT void mpf_swap _PROTO ((mpf_ptr, mpf_ptr));
+
+#if defined (__cplusplus)
+}
+#endif
+/************ Low level positive-integer (i.e. N) routines. ************/
+
+/* This is ugly, but we need to make user calls reach the prefixed function. */
+#define mpn_add __MPN(add)
+#define mpn_add_1 __MPN(add_1)
+#define mpn_add_n __MPN(add_n)
+#define mpn_add_nc __MPN(add_nc)
+#define mpn_addmul_1 __MPN(addmul_1)
+#define mpn_addsub_n __MPN(addsub_n)
+#define mpn_addsub_nc __MPN(addsub_nc)
+/* #define mpn_and_n __MPN(and_n) */
+/* #define mpn_andn_n __MPN(andn_n) */
+#define mpn_bdivmod __MPN(bdivmod)
+#define mpn_cmp __MPN(cmp)
+/* #define mpn_com_n __MPN(com_n) */
+#define mpn_copyd __MPN(copyd)
+#define mpn_copyi __MPN(copyi)
+#define mpn_divrem __MPN(divrem)
+#define mpn_divrem_1 __MPN(divrem_1)
+#define mpn_divrem_2 __MPN(divrem_2)
+#define mpn_dump __MPN(dump)
+#define mpn_gcd __MPN(gcd)
+#define mpn_gcd_1 __MPN(gcd_1)
+#define mpn_gcdext __MPN(gcdext)
+#define mpn_get_str __MPN(get_str)
+#define mpn_hamdist __MPN(hamdist)
+#define mpn_invert_limb __MPN(invert_limb)
+/* #define mpn_ior_n __MPN(ior_n) */
+/* #define mpn_iorn_n __MPN(iorn_n) */
+/* #define mpn_kara_mul_n __MPN(kara_mul_n) internal */
+/* #define mpn_kara_sqr_n __MPN(kara_sqr_n) internal */
+#define mpn_lshift __MPN(lshift)
+#define mpn_lshiftc __MPN(lshiftc)
+#define mpn_mod_1 __MPN(mod_1)
+#define mpn_mul __MPN(mul)
+#define mpn_mul_1 __MPN(mul_1)
+#define mpn_mul_basecase __MPN(mul_basecase)
+#define mpn_mul_n __MPN(mul_n)
+#define mpn_perfect_square_p __MPN(perfect_square_p)
+#define mpn_popcount __MPN(popcount)
+#define mpn_preinv_mod_1 __MPN(preinv_mod_1)
+/* #define mpn_nand_n __MPN(nand_n) */
+/* #define mpn_nior_n __MPN(nior_n) */
+#define mpn_random __MPN(random)
+#define mpn_random2 __MPN(random2)
+#define mpn_rshift __MPN(rshift)
+#define mpn_rshiftc __MPN(rshiftc)
+#define mpn_scan0 __MPN(scan0)
+#define mpn_scan1 __MPN(scan1)
+#define mpn_set_str __MPN(set_str)
+#define mpn_sqr_basecase __MPN(sqr_basecase)
+#define mpn_sqr_n __MPN(sqr_n)
+#define mpn_sqrtrem __MPN(sqrtrem)
+#define mpn_sub __MPN(sub)
+#define mpn_sub_1 __MPN(sub_1)
+#define mpn_sub_n __MPN(sub_n)
+#define mpn_sub_nc __MPN(sub_nc)
+#define mpn_submul_1 __MPN(submul_1)
+/* #define mpn_toom3_mul_n __MPN(toom3_mul_n) internal */
+/* #define mpn_toom3_sqr_n __MPN(toom3_sqr_n) internal */
+/* #define mpn_xnor_n __MPN(xnor_n) */
+/* #define mpn_xor_n __MPN(xor_n) */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+DLL_IMPORT mp_limb_t mpn_add _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+DLL_IMPORT mp_limb_t mpn_add_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+DLL_IMPORT mp_limb_t mpn_add_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_add_nc _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_addmul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_addmul_1c __MPN(addmul_1c)
+DLL_IMPORT mp_limb_t mpn_addmul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_addsub_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_bdivmod _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, unsigned long int));
+DLL_IMPORT int mpn_cmp _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_divexact_by3(dst, src, size) mpn_divexact_by3c (dst, src, size, 0)
+
+#define mpn_divexact_by3c __MPN(divexact_by3c)
+DLL_IMPORT mp_limb_t mpn_divexact_by3c _PROTO ((mp_ptr dst, mp_srcptr src,
+ mp_size_t size, mp_limb_t carry));
+
+#define mpn_divmod_1(qp,np,nsize,dlimb) mpn_divrem_1 (qp,0,np,nsize,dlimb)
+
+DLL_IMPORT mp_limb_t mpn_divrem _PROTO((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+
+DLL_IMPORT mp_limb_t mpn_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_divrem_1c __MPN(divrem_1c)
+DLL_IMPORT mp_limb_t mpn_divrem_1c _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t,
+ mp_limb_t, mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_divrem_2 _PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+DLL_IMPORT void mpn_dump _PROTO ((mp_srcptr, mp_size_t));
+mp_size_t mpn_gcd _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_size_t mpn_gcdext _PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+DLL_IMPORT size_t mpn_get_str _PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+DLL_IMPORT unsigned long int mpn_hamdist _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_jacobi_base __MPN(jacobi_base)
+DLL_IMPORT int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1));
+
+DLL_IMPORT mp_limb_t mpn_lshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+DLL_IMPORT mp_limb_t mpn_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_mod_1c __MPN(mod_1c)
+DLL_IMPORT mp_limb_t mpn_mod_1c _PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_mod_1_rshift __MPN(mod_1_rshift)
+DLL_IMPORT mp_limb_t mpn_mod_1_rshift _PROTO ((mp_srcptr, mp_size_t, unsigned,mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_mul _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_mul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_mul_1c __MPN(mul_1c)
+DLL_IMPORT mp_limb_t mpn_mul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+DLL_IMPORT void mpn_mul_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+DLL_IMPORT void mpn_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT int mpn_perfect_square_p _PROTO ((mp_srcptr, mp_size_t));
+DLL_IMPORT unsigned long int mpn_popcount _PROTO ((mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_preinv_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+DLL_IMPORT void mpn_random _PROTO ((mp_ptr, mp_size_t));
+DLL_IMPORT void mpn_random2 _PROTO ((mp_ptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_rshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+DLL_IMPORT unsigned long int mpn_scan0 _PROTO ((mp_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpn_scan1 _PROTO ((mp_srcptr, unsigned long int));
+mp_size_t mpn_set_str _PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int));
+DLL_IMPORT void mpn_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+DLL_IMPORT void mpn_sqr_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+mp_size_t mpn_sqrtrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_sub _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+DLL_IMPORT mp_limb_t mpn_sub_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+DLL_IMPORT mp_limb_t mpn_sub_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_sub_nc _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+DLL_IMPORT mp_limb_t mpn_submul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_submul_1c __MPN(submul_1c)
+DLL_IMPORT mp_limb_t mpn_submul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_tdiv_qr __MPN(tdiv_qr)
+DLL_IMPORT void mpn_tdiv_qr _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#if defined (__cplusplus)
+}
+#endif
+
+#define mpn_incr_u(p,incr) \
+ do { mp_limb_t __x; mp_ptr __p = p; \
+ __x = *__p + incr; \
+ *__p = __x; \
+ if (__x < incr) \
+ while (++(*(++__p)) == 0) \
+ ; \
+ } while (0)
+
+#define mpn_decr_u(p,incr) \
+ do { mp_limb_t __x; mp_ptr __p = p; \
+ __x = *__p; \
+ *__p = __x - incr; \
+ if (__x < incr) \
+ while ((*(++__p))-- == 0) \
+ ; \
+ } while (0)
+
+#if defined (__GNUC__) || defined (_FORCE_INLINES)
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_add_1 (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_limb_t s2_limb)
+#else
+mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+#endif
+{
+ register mp_limb_t x;
+
+ x = *s1_ptr++;
+ s2_limb = x + s2_limb;
+ *res_ptr++ = s2_limb;
+ if (s2_limb < x)
+ {
+ while (--s1_size != 0)
+ {
+ x = *s1_ptr++ + 1;
+ *res_ptr++ = x;
+ if (x != 0)
+ goto fin;
+ }
+
+ return 1;
+ }
+
+ fin:
+ if (res_ptr != s1_ptr)
+ {
+ mp_size_t i;
+ for (i = 0; i < s1_size - 1; i++)
+ res_ptr[i] = s1_ptr[i];
+ }
+ return 0;
+}
+
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_add (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_srcptr s2_ptr,
+ register mp_size_t s2_size)
+#else
+mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_srcptr s2_ptr;
+ register mp_size_t s2_size;
+#endif
+{
+ mp_limb_t cy_limb = 0;
+
+ if (s2_size != 0)
+ cy_limb = mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size);
+
+ if (s1_size - s2_size != 0)
+ cy_limb = mpn_add_1 (res_ptr + s2_size,
+ s1_ptr + s2_size,
+ s1_size - s2_size,
+ cy_limb);
+ return cy_limb;
+}
+
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_sub_1 (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_limb_t s2_limb)
+#else
+mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+#endif
+{
+ register mp_limb_t x;
+
+ x = *s1_ptr++;
+ s2_limb = x - s2_limb;
+ *res_ptr++ = s2_limb;
+ if (s2_limb > x)
+ {
+ while (--s1_size != 0)
+ {
+ x = *s1_ptr++;
+ *res_ptr++ = x - 1;
+ if (x != 0)
+ goto fin;
+ }
+
+ return 1;
+ }
+
+ fin:
+ if (res_ptr != s1_ptr)
+ {
+ mp_size_t i;
+ for (i = 0; i < s1_size - 1; i++)
+ res_ptr[i] = s1_ptr[i];
+ }
+ return 0;
+}
+
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_sub (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_srcptr s2_ptr,
+ register mp_size_t s2_size)
+#else
+mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_srcptr s2_ptr;
+ register mp_size_t s2_size;
+#endif
+{
+ mp_limb_t cy_limb = 0;
+
+ if (s2_size != 0)
+ cy_limb = mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size);
+
+ if (s1_size - s2_size != 0)
+ cy_limb = mpn_sub_1 (res_ptr + s2_size,
+ s1_ptr + s2_size,
+ s1_size - s2_size,
+ cy_limb);
+ return cy_limb;
+}
+#endif /* __GNUC__ */
+
+/* Allow faster testing for negative, zero, and positive. */
+#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0)
+#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0)
+#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0)
+
+/* When using GCC, optimize certain common comparisons. */
+#if defined (__GNUC__)
+#define mpz_cmp_ui(Z,UI) \
+ (__builtin_constant_p (UI) && (UI) == 0 \
+ ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI))
+#define mpz_cmp_si(Z,SI) \
+ (__builtin_constant_p (SI) && (SI) == 0 ? mpz_sgn (Z) \
+ : __builtin_constant_p (SI) && (SI) > 0 \
+ ? _mpz_cmp_ui (Z, (unsigned long int) SI) \
+ : _mpz_cmp_si (Z,SI))
+#define mpq_cmp_ui(Q,NUI,DUI) \
+ (__builtin_constant_p (NUI) && (NUI) == 0 \
+ ? mpq_sgn (Q) : _mpq_cmp_ui (Q,NUI,DUI))
+#else
+#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI)
+#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI)
+#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI)
+#endif
+
+
+/* Using "&" rather than "&&" means these can come out branch-free. Every
+ mpz_t has at least one limb allocated, so fetching the low limb is always
+ allowed. */
+#define mpz_odd_p(z) ((int) ((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
+#define mpz_even_p(z) (! mpz_odd_p (z))
+
+
+/* Allow direct user access to numerator and denominator of a mpq_t object. */
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+
+/* Compatibility with GMP 2 and earlier. */
+#define mpn_divmod(qp,np,nsize,dp,dsize) mpn_divrem (qp,0,np,nsize,dp,dsize)
+
+/* Compatibility with GMP 1. */
+#define mpz_mdiv mpz_fdiv_q
+#define mpz_mdivmod mpz_fdiv_qr
+#define mpz_mmod mpz_fdiv_r
+#define mpz_mdiv_ui mpz_fdiv_q_ui
+#define mpz_mdivmod_ui(q,r,n,d) \
+ ((r == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d))
+#define mpz_mmod_ui(r,n,d) \
+ ((r == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d))
+
+/* Useful synonyms, but not quite compatible with GMP 1. */
+#define mpz_div mpz_fdiv_q
+#define mpz_divmod mpz_fdiv_qr
+#define mpz_div_ui mpz_fdiv_q_ui
+#define mpz_divmod_ui mpz_fdiv_qr_ui
+#define mpz_mod_ui mpz_fdiv_r_ui
+#define mpz_div_2exp mpz_fdiv_q_2exp
+#define mpz_mod_2exp mpz_fdiv_r_2exp
+
+#define gmp_errno __gmp_errno
+extern int gmp_errno;
+
+enum
+{
+ GMP_ERROR_NONE = 0,
+ GMP_ERROR_UNSUPPORTED_ARGUMENT = 1,
+ GMP_ERROR_DIVISION_BY_ZERO = 2,
+ GMP_ERROR_SQRT_OF_NEGATIVE = 4,
+ GMP_ERROR_INVALID_ARGUMENT = 8,
+ GMP_ERROR_ALLOCATE = 16,
+ GMP_ERROR_BAD_STRING = 32,
+ GMP_ERROR_UNUSED_ERROR
+};
+
+/* Note: major version number is in mp.h too */
+#define __GNU_MP_VERSION 3
+#define __GNU_MP_VERSION_MINOR 1
+#define __GNU_MP_VERSION_PATCHLEVEL 1
+
+#define gmp_version __gmp_version
+extern __gmp_const char *gmp_version;
+
+#define __GMP_H__
+#endif /* __GMP_H__ */
diff --git a/rts/gmp/insert-dbl.c b/rts/gmp/insert-dbl.c
new file mode 100644
index 0000000000..dc88a56f62
--- /dev/null
+++ b/rts/gmp/insert-dbl.c
@@ -0,0 +1,98 @@
+/* __gmp_insert_double -- convert from array of mp_limb_t to double.
+
+Copyright (C) 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef XDEBUG
+#undef _GMP_IEEE_FLOATS
+#endif
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+double
+#if __STDC__
+__gmp_scale2 (double d, int exp)
+#else
+__gmp_scale2 (d, exp)
+ double d;
+ int exp;
+#endif
+{
+#if _GMP_IEEE_FLOATS
+ {
+#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8
+ /* Work around alpha-specific bug in GCC 2.8.x. */
+ volatile
+#endif
+ union ieee_double_extract x;
+ x.d = d;
+ exp += x.s.exp;
+ x.s.exp = exp;
+ if (exp >= 2047)
+ {
+ /* Return +-infinity */
+ x.s.exp = 2047;
+ x.s.manl = x.s.manh = 0;
+ }
+ else if (exp < 1)
+ {
+ x.s.exp = 1; /* smallest exponent (biased) */
+ /* Divide result by 2 until we have scaled it to the right IEEE
+ denormalized number, but stop if it becomes zero. */
+ while (exp < 1 && x.d != 0)
+ {
+ x.d *= 0.5;
+ exp++;
+ }
+ }
+ return x.d;
+ }
+#else
+ {
+ double factor, r;
+
+ factor = 2.0;
+ if (exp < 0)
+ {
+ factor = 0.5;
+ exp = -exp;
+ }
+ r = d;
+ if (exp != 0)
+ {
+ if ((exp & 1) != 0)
+ r *= factor;
+ exp >>= 1;
+ while (exp != 0)
+ {
+ factor *= factor;
+ if ((exp & 1) != 0)
+ r *= factor;
+ exp >>= 1;
+ }
+ }
+ return r;
+ }
+#endif
+}
diff --git a/rts/gmp/install-sh b/rts/gmp/install-sh
new file mode 100644
index 0000000000..e9de23842d
--- /dev/null
+++ b/rts/gmp/install-sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission. M.I.T. makes no representations about the
+# suitability of this software for any purpose. It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch. It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+ case $1 in
+ -c) instcmd="$cpprog"
+ shift
+ continue;;
+
+ -d) dir_arg=true
+ shift
+ continue;;
+
+ -m) chmodcmd="$chmodprog $2"
+ shift
+ shift
+ continue;;
+
+ -o) chowncmd="$chownprog $2"
+ shift
+ shift
+ continue;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift
+ shift
+ continue;;
+
+ -s) stripcmd="$stripprog"
+ shift
+ continue;;
+
+ -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+ shift
+ continue;;
+
+ -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+ shift
+ continue;;
+
+ *) if [ x"$src" = x ]
+ then
+ src=$1
+ else
+ # this colon is to work around a 386BSD /bin/sh bug
+ :
+ dst=$1
+ fi
+ shift
+ continue;;
+ esac
+done
+
+if [ x"$src" = x ]
+then
+ echo "install: no input file specified"
+ exit 1
+else
+ true
+fi
+
+if [ x"$dir_arg" != x ]; then
+ dst=$src
+ src=""
+
+ if [ -d $dst ]; then
+ instcmd=:
+ chmodcmd=""
+ else
+ instcmd=mkdir
+ fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad
+# if $src (and thus $dsttmp) contains '*'.
+
+ if [ -f $src -o -d $src ]
+ then
+ true
+ else
+ echo "install: $src does not exist"
+ exit 1
+ fi
+
+ if [ x"$dst" = x ]
+ then
+ echo "install: no destination specified"
+ exit 1
+ else
+ true
+ fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+ if [ -d $dst ]
+ then
+ dst="$dst"/`basename $src`
+ else
+ true
+ fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+# this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+ pathcomp="${pathcomp}${1}"
+ shift
+
+ if [ ! -d "${pathcomp}" ] ;
+ then
+ $mkdirprog "${pathcomp}"
+ else
+ true
+ fi
+
+ pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+ $doit $instcmd $dst &&
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+ if [ x"$transformarg" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ dstfile=`basename $dst $transformbasename |
+ sed $transformarg`$transformbasename
+ fi
+
+# don't allow the sed command to completely eliminate the filename
+
+ if [ x"$dstfile" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ true
+ fi
+
+# Make a temp file name in the proper directory.
+
+ dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+ $doit $instcmd $src $dsttmp &&
+
+ trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing. If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+ $doit $rmcmd -f $dstdir/$dstfile &&
+ $doit $mvcmd $dsttmp $dstdir/$dstfile
+
+fi &&
+
+
+exit 0
diff --git a/rts/gmp/longlong.h b/rts/gmp/longlong.h
new file mode 100644
index 0000000000..9a12755053
--- /dev/null
+++ b/rts/gmp/longlong.h
@@ -0,0 +1,1347 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this file; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+
+ UWtype -- An unsigned type, default type for operations (typically a "word")
+ UHWtype -- An unsigned type, at least half the size of UWtype.
+ UDWtype -- An unsigned type, at least twice as large a UWtype
+ W_TYPE_SIZE -- size in bits of UWtype
+
+ SItype, USItype -- Signed and unsigned 32 bit types.
+ DItype, UDItype -- Signed and unsigned 64 bit types.
+
+ On a 32 bit machine UWtype should typically be USItype;
+ on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+ that use this file takes place. */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+/* Define auxiliary asm macros.
+
+ 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ word product in HIGH_PROD and LOW_PROD.
+
+ 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ UDWtype product. This is just a variant of umul_ppmm.
+
+ 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ denominator) divides a UDWtype, composed by the UWtype integers
+ HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
+ than DENOMINATOR for correct operation. If, in addition, the most
+ significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+ 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ denominator). Like udiv_qrnnd but the numbers are signed. The quotient
+ is rounded towards 0.
+
+ 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ msb to the first non-zero bit in the UWtype X. This is the number of
+ steps X needs to be shifted left to set the msb. Undefined for X == 0,
+ unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+ 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ from the least significant end.
+
+ 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
+ (i.e. carry out) is not stored anywhere, and is lost.
+
+ 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
+ and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
+ and is lost.
+
+ If any of these macros are left undefined for a particular CPU,
+ C macros are used. */
+
+/* The CPUs come in alphabetical order below.
+
+ Please add support for more CPUs here, or improve the current support
+ for the CPUs below! */
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+#if defined (__GNUC__)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("umulh %r1,%2,%0" \
+ : "=r" (ph) \
+ : "%rJ" (m0), "rI" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 18
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { UDItype __di; \
+ __di = __MPN(invert_limb) (d); \
+ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
+ } while (0)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define UDIV_TIME 220
+long __MPN(count_leading_zeros) ();
+#define count_leading_zeros(count, x) \
+ ((count) = __MPN(count_leading_zeros) (x))
+#endif /* LONGLONG_STANDALONE */
+#else /* ! __GNUC__ */
+#include <machine/builtins.h>
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ (ph) = __UMULH (m0, m1); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#endif
+#endif /* __alpha */
+
+#if defined (__hppa) && W_TYPE_SIZE == 64
+/* We put the result pointer parameter last here, since it makes passing
+ of the other parameters more efficient. */
+#ifndef LONGLONG_STANDALONE
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ UDItype __p0; \
+ (wh) = __MPN(umul_ppmm) (u, v, &__p0); \
+ (wl) = __p0; \
+ } while (0)
+extern UDItype __MPN(umul_ppmm) _PROTO ((UDItype, UDItype, UDItype *));
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { UDItype __r; \
+ (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \
+ (r) = __r; \
+ } while (0)
+extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *));
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#endif /* LONGLONG_STANDALONE */
+#endif /* hppa */
+
+#if defined (__ia64) && W_TYPE_SIZE == 64
+#if defined (__GNUC__)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("xma.hu %0 = %1, %2, f0" \
+ : "=e" (ph) \
+ : "e" (m0), "e" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#endif
+#endif
+
+
+#if defined (__GNUC__) && !defined (NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+ understood by gcc1. Use cpp to avoid major code duplication. */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("multiplu %0,%1,%2" \
+ : "=r" (xl) \
+ : "r" (__m0), "r" (__m1)); \
+ __asm__ ("multmu %0,%1,%2" \
+ : "=r" (xh) \
+ : "r" (__m0), "r" (__m1)); \
+ } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("dividu %0,%3,%4" \
+ : "=r" (q), "=q" (r) \
+ : "1" (n1), "r" (n0), "r" (d))
+#define count_leading_zeros(count, x) \
+ __asm__ ("clz %0,%1" \
+ : "=r" (count) \
+ : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
+#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
+#define umul_ppmm(xh, xl, a, b) \
+ __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#define smul_ppmm(xh, xl, a, b) \
+ __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#define UMUL_TIME 5
+#else
+#define umul_ppmm(xh, xl, a, b) \
+ __asm__ ("%@ Inlined umul_ppmm\n" \
+ "mov %|r0, %2, lsr #16\n" \
+ "mov %|r2, %3, lsr #16\n" \
+ "bic %|r1, %2, %|r0, lsl #16\n" \
+ "bic %|r2, %3, %|r2, lsl #16\n" \
+ "mul %1, %|r1, %|r2\n" \
+ "mul %|r2, %|r0, %|r2\n" \
+ "mul %|r1, %0, %|r1\n" \
+ "mul %0, %|r0, %0\n" \
+ "adds %|r1, %|r2, %|r1\n" \
+ "addcs %0, %0, #65536\n" \
+ "adds %1, %1, %|r1, lsl #16\n" \
+ "adc %0, %0, %|r1, lsr #16" \
+ : "=&r" (xh), "=r" (xl) \
+ : "r" (a), "r" (b) \
+ : "r0", "r1", "r2")
+#define UMUL_TIME 20
+#endif
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+#if defined (__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("mulwux %2,%0" \
+ : "=r" (__x.__ll) \
+ : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define smul_ppmm(w1, w0, u, v) \
+ ({union {DItype __ll; \
+ struct {SItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("mulwx %2,%0" \
+ : "=r" (__x.__ll) \
+ : "%0" ((SItype)(u)), "r" ((SItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("mulwux %2,%0" \
+ : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
+ __w; })
+#endif /* __clipper__ */
+
+/* Fujitsu vector computers. */
+#if defined (__uxp__) && W_TYPE_SIZE == 32
+#define umul_ppmm(ph, pl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\
+ (ph) = __x.__i.__h; \
+ (pl) = __x.__i.__l; \
+ } while (0)
+#define smul_ppmm(ph, pl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \
+ (ph) = __x.__i.__h; \
+ (pl) = __x.__i.__l; \
+ } while (0)
+#endif
+
+#if defined (__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add.w %5,%1\n\taddx %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+ __asm__ ("mulx %3,%0,%1" \
+ : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \
+ : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ __asm__ ("divx %4,%0,%1" \
+ : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \
+ : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ __asm__ ("bsch/1 %1,%0" \
+ : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
+#endif
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \
+ (wh) = __x.__i.__h; \
+ (wl) = __x.__i.__l; \
+ } while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { USItype __r; \
+ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype));
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __tmp; \
+ __asm__ ( \
+ "ldi 2,%0\n" \
+ "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
+ "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \
+ "ldo 16(%0),%0 ; Yes. Perform add.\n" \
+ "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
+ "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \
+ "ldo 8(%0),%0 ; Yes. Perform add.\n" \
+ "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
+ "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \
+ "ldo 4(%0),%0 ; Yes. Perform add.\n" \
+ "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
+ "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \
+ "ldo 2(%0),%0 ; Yes. Perform add.\n" \
+ "extru %1,30,1,%1 ; Extract bit 1.\n" \
+ "sub %0,%1,%0 ; Subtract it.\n" \
+ : "=r" (count), "=r" (__tmp) : "1" (x)); \
+ } while (0)
+#endif /* hppa */
+
+#if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#define smul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {DItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mr %0,%3" \
+ : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
+ : "%1" (m0), "r" (m1)); \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ union {DItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __x.__i.__h = n1; __x.__i.__l = n0; \
+ __asm__ ("dr %0,%2" \
+ : "=r" (__x.__ll) \
+ : "0" (__x.__ll), "r" (d)); \
+ (q) = __x.__i.__l; (r) = __x.__i.__h; \
+ } while (0)
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addl %5,%1\n\tadcl %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subl %5,%1\n\tsbbl %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mull %3" \
+ : "=a" (w0), "=d" (w1) \
+ : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divl %4" \
+ : "=a" (q), "=d" (r) \
+ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define count_trailing_zeros(count, x) \
+ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 10
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+#if defined (__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r,h,l,c) \
+ __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \
+ "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("emul %2,%1,%0" \
+ : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \
+ __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __nn; \
+ __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+ __asm__ ("ediv %d,%n,%0" \
+ : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
+ (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
+#if defined (__i960mx) /* what is the proper symbol to test??? */
+#define rshift_rhlc(r,h,l,c) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __nn; \
+ __nn.__i.__h = (h); __nn.__i.__l = (l); \
+ __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+ }
+#endif /* i960mx */
+#endif /* i960 */
+
+#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
+ || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
+ || defined (__mc5307__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
+ : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "d" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
+ : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */
+#if defined (__mc68020__) || defined(mc68020) \
+ || defined (__mc68030__) || defined (mc68030) \
+ || defined (__mc68040__) || defined (mc68040) \
+ || defined (__mc68332__) || defined (mc68332) \
+ || defined (__NeXT__)
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mulu%.l %3,%1:%0" \
+ : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \
+ : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divu%.l %4,%1:%0" \
+ : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divs%.l %4,%1:%0" \
+ : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#else /* for other 68k family members use 16x16->32 multiplication */
+#define umul_ppmm(xh, xl, a, b) \
+ do { USItype __umul_tmp1, __umul_tmp2; \
+ __asm__ ("| Inlined umul_ppmm\n" \
+ "move%.l %5,%3\n" \
+ "move%.l %2,%0\n" \
+ "move%.w %3,%1\n" \
+ "swap %3\n" \
+ "swap %0\n" \
+ "mulu%.w %2,%1\n" \
+ "mulu%.w %3,%0\n" \
+ "mulu%.w %2,%3\n" \
+ "swap %2\n" \
+ "mulu%.w %5,%2\n" \
+ "add%.l %3,%2\n" \
+ "jcc 1f\n" \
+ "add%.l %#0x10000,%0\n" \
+"1: move%.l %2,%3\n" \
+ "clr%.w %2\n" \
+ "swap %2\n" \
+ "swap %3\n" \
+ "clr%.w %3\n" \
+ "add%.l %3,%1\n" \
+ "addx%.l %2,%0\n" \
+ "| End inlined umul_ppmm" \
+ : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+ } while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+/* The '020, '030, '040 and '060 have bitfield insns. */
+#if defined (__mc68020__) || defined (mc68020) \
+ || defined (__mc68030__) || defined (mc68030) \
+ || defined (__mc68040__) || defined (mc68040) \
+ || defined (__mc68060__) || defined (mc68060) \
+ || defined (__NeXT__)
+#define count_leading_zeros(count, x) \
+ __asm__ ("bfffo %1{%b2:%b2},%0" \
+ : "=d" ((USItype) (count)) \
+ : "od" ((USItype) (x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+ (wh) = __x.__i.__h; \
+ (wl) = __x.__i.__l; \
+ } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ ({union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x, __q; \
+ __x.__i.__h = (n1); __x.__i.__l = (n0); \
+ __asm__ ("divu.d %0,%1,%2" \
+ : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+ (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \
+ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips */
+
+#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \
+ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips */
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("meid %2,%0" \
+ : "=g" (__x.__ll) \
+ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("meid %2,%0" \
+ : "=g" (__w) \
+ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
+ __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __x.__i.__h = (n1); __x.__i.__l = (n0); \
+ __asm__ ("deid %2,%0" \
+ : "=g" (__x.__ll) \
+ : "0" (__x.__ll), "g" ((USItype)(d))); \
+ (r) = __x.__i.__l; (q) = __x.__i.__h; })
+#define count_trailing_zeros(count,x) \
+ do { \
+ __asm__ ("ffsd %2,%0" \
+ : "=r" ((USItype) (count)) \
+ : "0" ((USItype) 0), "r" ((USItype) (x))); \
+ } while (0)
+#endif /* __ns32000__ */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+ vendor compilers. */
+#if (defined (_ARCH_PPC) || defined (_ARCH_PWR) || defined (__powerpc__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else \
+ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
+ } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else \
+ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined (_ARCH_PPC) || defined (__powerpc__)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+ do { \
+ SItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+ __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+ __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#define UDIV_TIME 100
+#endif
+#endif /* 32-bit POWER architecture variants. */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+ vendor compilers. */
+#if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else \
+ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
+ } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else \
+ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+ do { \
+ DItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define SMUL_TIME 14 /* ??? */
+#define UDIV_TIME 120 /* ??? */
+#endif /* 64-bit PowerPC. */
+
+#if defined (__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addw %5,%1\n\taddwc %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subw %5,%1\n\tsubwb %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("movw %1,%R0\n\tuemul %2,%0" \
+ : "=&r" (__x.__ll) \
+ : "g" ((USItype) (u)), "g" ((USItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#endif /* __pyr__ */
+
+#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("a %1,%5\n\tae %0,%3" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "r" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("s %1,%5\n\tse %0,%3" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define smul_ppmm(ph, pl, m0, m1) \
+ __asm__ ( \
+ "s r2,r2\n" \
+ "mts r10,%2\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "cas %0,r2,r0\n" \
+ "mfs r10,%1" \
+ : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \
+ : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
+ : "r2"); \
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+ do { \
+ if ((x) >= 0x10000) \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \
+ else \
+ { \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \
+ (count) += 16; \
+ } \
+ } while (0)
+#endif /* RT/ROMP */
+
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
+ : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#if defined (__sparc_v9__) || defined (__sparcv9)
+/* Perhaps we should use floating-point operations here? */
+#if 0
+/* Triggers a bug making mpz/tests/t-gcd.c fail.
+ Perhaps we simply need explicitly zero-extend the inputs? */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \
+ "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
+#else
+/* Use v8 umul until above bug is fixed. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#endif
+/* Use a plain v8 divide for v9. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
+#else
+#if defined (__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+ 2) the 'I' flag thinks of the range as a 13 bit signed interval,
+ while we want to match a 13 bit interval, sign extended to 32 bits,
+ but INTERPRETED AS UNSIGNED. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
+#define UDIV_TIME 25
+#else
+#define UDIV_TIME 60 /* SuperSPARC timing */
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide. It also has two additional
+ instructions scan (ffs from high bit) and divscc. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("! Inlined udiv_qrnnd\n" \
+ "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
+ "tst %%g0\n" \
+ "divscc %3,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%0\n" \
+ "rd %%y,%1\n" \
+ "bl,a 1f\n" \
+ "add %1,%4,%1\n" \
+"1: ! End of inline udiv_qrnnd" \
+ : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \
+ : "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+ __asm__ ("scan %1,0,%0" : "=r" (x) : "r" (count))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+ implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
+ undefined. */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* __sparc_v9__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("! Inlined umul_ppmm\n" \
+ "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
+ "sra %3,31,%%g2 ! Don't move this insn\n" \
+ "and %2,%%g2,%%g2 ! Don't move this insn\n" \
+ "andcc %%g0,0,%%g1 ! Don't move this insn\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,0,%%g1\n" \
+ "add %%g1,%%g2,%0\n" \
+ "rd %%y,%1" \
+ : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
+ : "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39 /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { USItype __r; \
+ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype));
+#ifndef UDIV_TIME
+#define UDIV_TIME 140
+#endif
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define smul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("emul %1,%2,$0,%0" \
+ : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ union {DItype __ll; \
+ struct {SItype __l, __h;} __i; \
+ } __x; \
+ __x.__i.__h = n1; __x.__i.__l = n0; \
+ __asm__ ("ediv %3,%2,%0,%1" \
+ : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
+ } while (0)
+#endif /* __vax__ */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
+ : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
+ : "%0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
+ "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
+ : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
+ : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
+ "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {long int __ll; \
+ struct {unsigned int __h, __l;} __i; \
+ } __x; \
+ unsigned int __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mult %S0,%H3" \
+ : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
+ : "%1" (m0), "rQR" (m1)); \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ (xh) += ((((signed int) __m0 >> 15) & __m1) \
+ + (((signed int) __m1 >> 15) & __m0)); \
+ } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+
+#if !defined (umul_ppmm) && defined (__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+ { \
+ UDWtype __ll = __umulsidi3 (m0, m1); \
+ ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+ pl = (UWtype) __ll; \
+ }
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+ ({UWtype __hi, __lo; \
+ umul_ppmm (__hi, __lo, u, v); \
+ ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+
+/* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA
+ versions above are different and we don't want to conflict. */
+
+#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm
+#define mpn_umul_ppmm __MPN(umul_ppmm)
+extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ mp_limb_t __umul_ppmm__p0; \
+ (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \
+ (mp_limb_t) (u), (mp_limb_t) (v)); \
+ (wl) = __umul_ppmm__p0; \
+ } while (0)
+#endif
+
+#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd
+#define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
+extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *,
+ mp_limb_t, mp_limb_t, mp_limb_t));
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ mp_limb_t __udiv_qrnnd__r; \
+ (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \
+ (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \
+ (r) = __udiv_qrnnd__r; \
+ } while (0)
+#endif
+
+
+/* If this machine has no inline assembler, use C macros. */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) + (bl); \
+ (sh) = (ah) + (bh) + (__x < (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - (__x > (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+ smul_ppmm. */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __xm0 = (u), __xm1 = (v); \
+ smul_ppmm (__w1, w0, __xm0, __xm1); \
+ (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
+ + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
+ } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ UWtype __u = (u), __v = (v); \
+ \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ __vh = __ll_highpart (__v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ \
+ __x1 += __ll_highpart (__x0);/* this can't give carry */ \
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += __ll_B; /* yes, add it in the proper pos. */ \
+ \
+ (w1) = __x3 + __ll_highpart (__x1); \
+ (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \
+ } while (0)
+#endif
+
+/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
+ exist in one form or another. */
+#if !defined (smul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __xm0 = (u), __xm1 = (v); \
+ umul_ppmm (__w1, w0, __xm0, __xm1); \
+ (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
+ - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
+ } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+ do { \
+ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+ __d1 = __ll_highpart (d); \
+ __d0 = __ll_lowpart (d); \
+ \
+ __q1 = (n1) / __d1; \
+ __r1 = (n1) - __q1 * __d1; \
+ __m = (UWtype) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart (n0); \
+ if (__r1 < __m) \
+ { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ \
+ __q0 = __r1 / __d1; \
+ __r0 = __r1 - __q0 * __d1; \
+ __m = (UWtype) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
+ if (__r0 < __m) \
+ { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ \
+ (q) = (UWtype) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+ } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+ __udiv_w_sdiv (defined in libgcc or elsewhere). */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ do { \
+ UWtype __r; \
+ (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+ (r) = __r; \
+ } while (0)
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+extern
+#if __STDC__
+const
+#endif
+unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+ do { \
+ UWtype __xr = (x); \
+ UWtype __a; \
+ \
+ if (W_TYPE_SIZE <= 32) \
+ { \
+ __a = __xr < ((UWtype) 1 << 2*__BITS4) \
+ ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+ : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\
+ } \
+ else \
+ { \
+ for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+ if (((__xr >> __a) & 0xff) != 0) \
+ break; \
+ } \
+ \
+ (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+ } while (0)
+/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros. The latter might be
+ defined in asm, but if it is not, the C version above is good enough. */
+#define count_trailing_zeros(count, x) \
+ do { \
+ UWtype __ctz_x = (x); \
+ UWtype __ctz_c; \
+ count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
+ (count) = W_TYPE_SIZE - 1 - __ctz_c; \
+ } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
+
+/* Give defaults for UMUL_TIME and UDIV_TIME. */
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+/* count_trailing_zeros is often on the slow side, so make that the default */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 15 /* cycles */
+#endif
+
+
diff --git a/rts/gmp/ltconfig b/rts/gmp/ltconfig
new file mode 100644
index 0000000000..6d8cf33e8f
--- /dev/null
+++ b/rts/gmp/ltconfig
@@ -0,0 +1,3109 @@
+#! /bin/sh
+
+# ltconfig - Create a system-specific libtool.
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# A lot of this script is taken from autoconf-2.10.
+
+# Check that we are running under the correct shell.
+SHELL=${CONFIG_SHELL-/bin/sh}
+echo=echo
+if test "X$1" = X--no-reexec; then
+ # Discard the --no-reexec flag, and continue.
+ shift
+elif test "X$1" = X--fallback-echo; then
+ # Avoid inline document here, it may be left over
+ :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+ # Yippee, $echo works!
+ :
+else
+ # Restart under the correct shell.
+ exec "$SHELL" "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+ # used as fallback echo
+ shift
+ cat <<EOF
+$*
+EOF
+ exit 0
+fi
+
+# Find the correct PATH separator. Usually this is `:', but
+# DJGPP uses `;' like DOS.
+if test "X${PATH_SEPARATOR+set}" != Xset; then
+ UNAME=${UNAME-`uname 2>/dev/null`}
+ case X$UNAME in
+ *-DOS) PATH_SEPARATOR=';' ;;
+ *) PATH_SEPARATOR=':' ;;
+ esac
+fi
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+if test "X${echo_test_string+set}" != Xset; then
+ # find a string as large as possible, as long as the shell can cope with it
+ for cmd in 'sed 50q "$0"' 'sed 20q "$0"' 'sed 10q "$0"' 'sed 2q "$0"' 'echo test'; do
+ # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ...
+ if (echo_test_string="`eval $cmd`") 2>/dev/null &&
+ echo_test_string="`eval $cmd`" &&
+ (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null; then
+ break
+ fi
+ done
+fi
+
+if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ :
+else
+ # The Solaris, AIX, and Digital Unix default echo programs unquote
+ # backslashes. This makes it impossible to quote backslashes using
+ # echo "$something" | sed 's/\\/\\\\/g'
+ #
+ # So, first we look for a working echo in the user's PATH.
+
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}"
+ for dir in $PATH /usr/ucb; do
+ if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
+ test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ echo="$dir/echo"
+ break
+ fi
+ done
+ IFS="$save_ifs"
+
+ if test "X$echo" = Xecho; then
+ # We didn't find a better echo, so look for alternatives.
+ if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ # This shell has a builtin print -r that does the trick.
+ echo='print -r'
+ elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) &&
+ test "X$CONFIG_SHELL" != X/bin/ksh; then
+ # If we have ksh, try running ltconfig again with it.
+ ORIGINAL_CONFIG_SHELL="${CONFIG_SHELL-/bin/sh}"
+ export ORIGINAL_CONFIG_SHELL
+ CONFIG_SHELL=/bin/ksh
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" --no-reexec ${1+"$@"}
+ else
+ # Try using printf.
+ echo='printf "%s\n"'
+ if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ # Cool, printf works
+ :
+ elif echo_testing_string=`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null` &&
+ test "X$echo_testing_string" = 'X\t' &&
+ echo_testing_string=`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ CONFIG_SHELL="$ORIGINAL_CONFIG_SHELL"
+ export CONFIG_SHELL
+ SHELL="$CONFIG_SHELL"
+ export SHELL
+ echo="$CONFIG_SHELL $0 --fallback-echo"
+ elif echo_testing_string=`("$CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null` &&
+ test "X$echo_testing_string" = 'X\t' &&
+ echo_testing_string=`("$CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ echo="$CONFIG_SHELL $0 --fallback-echo"
+ else
+ # maybe with a smaller string...
+ prev=:
+
+ for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do
+ if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null; then
+ break
+ fi
+ prev="$cmd"
+ done
+
+ if test "$prev" != 'sed 50q "$0"'; then
+ echo_test_string=`eval $prev`
+ export echo_test_string
+ exec "${ORIGINAL_CONFIG_SHELL}" "$0" ${1+"$@"}
+ else
+ # Oops. We lost completely, so just stick with echo.
+ echo=echo
+ fi
+ fi
+ fi
+ fi
+fi
+
+# Sed substitution that helps us do robust quoting. It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e s/^X//'
+sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# The name of this program.
+progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'`
+
+# Constants:
+PROGRAM=ltconfig
+PACKAGE=libtool
+VERSION=1.3c
+TIMESTAMP=" (1.696 2000/03/14 20:22:42)"
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+rm="rm -f"
+
+help="Try \`$progname --help' for more information."
+
+# Global variables:
+default_ofile=libtool
+can_build_shared=yes
+enable_shared=yes
+# All known linkers require a `.a' archive for static linking (except M$VC,
+# which needs '.lib').
+enable_static=yes
+enable_fast_install=yes
+enable_dlopen=unknown
+enable_win32_dll=no
+pic_mode=default
+ltmain=
+silent=
+srcdir=
+ac_config_guess=
+ac_config_sub=
+host=
+build=NONE
+nonopt=NONE
+ofile="$default_ofile"
+verify_host=yes
+with_gcc=no
+with_gnu_ld=no
+need_locks=yes
+ac_ext=c
+libext=a
+cache_file=
+
+old_AR="$AR"
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+old_CPPFLAGS="$CPPFLAGS"
+old_LDFLAGS="$LDFLAGS"
+old_LIBS="$LIBS"
+old_MAGIC="$MAGIC"
+old_LD="$LD"
+old_LN_S="$LN_S"
+old_NM="$NM"
+old_RANLIB="$RANLIB"
+old_STRIP="$STRIP"
+old_AS="$AS"
+old_DLLTOOL="$DLLTOOL"
+old_OBJDUMP="$OBJDUMP"
+old_OBJEXT="$OBJEXT"
+old_EXEEXT="$EXEEXT"
+old_reload_Flag="$reload_flag"
+old_deplibs_check_method="$deplibs_check_method"
+old_file_magic_cmd="$file_magic_cmd"
+
+# Parse the command line options.
+args=
+prev=
+for option
+do
+ case "$option" in
+ -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) optarg= ;;
+ esac
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$prev"; then
+ eval "$prev=\$option"
+ prev=
+ continue
+ fi
+
+ case "$option" in
+ --help) cat <<EOM
+Usage: $progname [OPTION]... LTMAIN [HOST]
+
+Generate a system-specific libtool script.
+
+ --build configure for building on BUILD [BUILD=HOST]
+ --debug enable verbose shell tracing
+ --disable-shared do not build shared libraries
+ --disable-static do not build static libraries
+ --disable-fast-install do not optimize for fast installation
+ --enable-dlopen enable dlopen support
+ --enable-win32-dll enable building dlls on win32 hosts
+ --help display this help and exit
+ --no-verify do not verify that HOST is a valid host type
+-o, --output=FILE specify the output file [default=$default_ofile]
+ --quiet same as \`--silent'
+ --silent do not print informational messages
+ --srcdir=DIR find \`config.guess' in DIR
+ --version output version information and exit
+ --with-gcc assume that the GNU C compiler will be used
+ --with-gnu-ld assume that the C compiler uses the GNU linker
+ --prefer-pic try to use only PIC objects
+ --prefer-non-pic try to use only non-PIC objects
+ --disable-lock disable file locking
+ --cache-file=FILE configure cache file
+
+LTMAIN is the \`ltmain.sh' shell script fragment or \`ltmain.c' program
+that provides basic libtool functionality.
+
+HOST is the canonical host system name [default=guessed].
+EOM
+ exit 0
+ ;;
+
+ --build) prev=build ;;
+ --build=*) build="$optarg" ;;
+
+ --debug)
+ echo "$progname: enabling shell trace mode"
+ set -x
+ ;;
+
+ --disable-shared) enable_shared=no ;;
+
+ --disable-static) enable_static=no ;;
+
+ --disable-fast-install) enable_fast_install=no ;;
+
+ --enable-dlopen) enable_dlopen=yes ;;
+
+ --enable-win32-dll) enable_win32_dll=yes ;;
+
+ --quiet | --silent) silent=yes ;;
+
+ --srcdir) prev=srcdir ;;
+ --srcdir=*) srcdir="$optarg" ;;
+
+ --no-verify) verify_host=no ;;
+
+ --output | -o) prev=ofile ;;
+ --output=*) ofile="$optarg" ;;
+
+ --version) echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"; exit 0 ;;
+
+ --with-gcc) with_gcc=yes ;;
+ --with-gnu-ld) with_gnu_ld=yes ;;
+
+ --prefer-pic) pic_mode=yes ;;
+ --prefer-non-pic) pic_mode=no ;;
+
+ --disable-lock) need_locks=no ;;
+
+ --cache-file=*) cache_file="$optarg" ;;
+
+ -*)
+ echo "$progname: unrecognized option \`$option'" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ ;;
+
+ *)
+ if test -z "$ltmain"; then
+ ltmain="$option"
+ elif test -z "$host"; then
+# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1
+# if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then
+# echo "$progname: warning \`$option' is not a valid host type" 1>&2
+# fi
+ host="$option"
+ else
+ echo "$progname: too many arguments" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi ;;
+ esac
+done
+
+if test -z "$ltmain"; then
+ echo "$progname: you must specify a LTMAIN file" 1>&2
+ echo "$help" 1>&2
+ exit 1
+fi
+
+if test ! -f "$ltmain"; then
+ echo "$progname: \`$ltmain' does not exist" 1>&2
+ echo "$help" 1>&2
+ exit 1
+fi
+
+# Quote any args containing shell metacharacters.
+ltconfig_args=
+for arg
+do
+ case "$arg" in
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
+ ltconfig_args="$ltconfig_args '$arg'" ;;
+ *) ltconfig_args="$ltconfig_args $arg" ;;
+ esac
+done
+
+# A relevant subset of AC_INIT.
+
+# File descriptor usage:
+# 0 standard input
+# 1 file creation
+# 2 errors and warnings
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 5 compiler messages saved in config.log
+# 6 checking for... messages and results
+if test "$silent" = yes; then
+ exec 6>/dev/null
+else
+ exec 6>&1
+fi
+exec 5>>./config.log
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+if test "X${LC_ALL+set}" = Xset; then LC_ALL=C; export LC_ALL; fi
+if test "X${LANG+set}" = Xset; then LANG=C; export LANG; fi
+
+if test -n "$cache_file" && test -r "$cache_file"; then
+ echo "loading cache $cache_file within ltconfig"
+ . $cache_file
+fi
+
+if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
+ # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
+ if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
+ ac_n= ac_c='
+' ac_t=' '
+ else
+ ac_n=-n ac_c= ac_t=
+ fi
+else
+ ac_n= ac_c='\c' ac_t=
+fi
+
+if test -z "$srcdir"; then
+ # Assume the source directory is the same one as the path to LTMAIN.
+ srcdir=`$echo "X$ltmain" | $Xsed -e 's%/[^/]*$%%'`
+ test "$srcdir" = "$ltmain" && srcdir=.
+fi
+
+trap "$rm conftest*; exit 1" 1 2 15
+if test "$verify_host" = yes; then
+ # Check for config.guess and config.sub.
+ ac_aux_dir=
+ for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/config.guess; then
+ ac_aux_dir=$ac_dir
+ break
+ fi
+ done
+ if test -z "$ac_aux_dir"; then
+ echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi
+ ac_config_guess=$ac_aux_dir/config.guess
+ ac_config_sub=$ac_aux_dir/config.sub
+
+ # Make sure we can run config.sub.
+ if $SHELL $ac_config_sub sun4 >/dev/null 2>&1; then :
+ else
+ echo "$progname: cannot run $ac_config_sub" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi
+
+ echo $ac_n "checking host system type""... $ac_c" 1>&6
+
+ host_alias=$host
+ case "$host_alias" in
+ "")
+ if host_alias=`$SHELL $ac_config_guess`; then :
+ else
+ echo "$progname: cannot guess host type; you must specify one" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi ;;
+ esac
+ host=`$SHELL $ac_config_sub $host_alias`
+ echo "$ac_t$host" 1>&6
+
+ # Make sure the host verified.
+ test -z "$host" && exit 1
+
+ # Check for the build system type
+ echo $ac_n "checking build system type... $ac_c" 1>&6
+
+ build_alias=$build
+ case "$build_alias" in
+ NONE)
+ case $nonopt in
+ NONE) build_alias=$host_alias ;;
+ *) build_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ build=`$SHELL $ac_config_sub $build_alias`
+ build_cpu=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ build_vendor=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ build_os=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+ echo "$ac_t""$build" 1>&6
+
+elif test -z "$host"; then
+ echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2
+ echo "$help" 1>&2
+ exit 1
+else
+ host_alias=$host
+ build_alias=$host_alias
+ build=$host
+fi
+
+if test x"$host" != x"$build"; then
+ ac_tool_prefix=${host_alias}-
+else
+ ac_tool_prefix=
+fi
+
+host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
+case "$host_os" in
+linux-gnu*) ;;
+linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
+esac
+
+case "$host_os" in
+aix3*)
+ # AIX sometimes has problems with the GCC collect2 program. For some
+ # reason, if we set the COLLECT_NAMES environment variable, the problems
+ # vanish in a puff of smoke.
+ if test "X${COLLECT_NAMES+set}" != Xset; then
+ COLLECT_NAMES=
+ export COLLECT_NAMES
+ fi
+ ;;
+esac
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR cru $oldlib$oldobjs$old_deplibs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+# Set sane defaults for various variables
+test -z "$AR" && AR=ar
+test -z "$AS" && AS=as
+test -z "$CC" && CC=cc
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+test -z "$MAGIC" && MAGIC=file
+test -z "$LD" && LD=ld
+test -z "$LN_S" && LN_S="ln -s"
+test -z "$NM" && NM=nm
+test -z "$OBJDUMP" && OBJDUMP=objdump
+test -z "$RANLIB" && RANLIB=:
+test -z "$STRIP" && STRIP=:
+test -z "$objext" && objext=o
+
+echo $ac_n "checking for objdir... $ac_c" 1>&6
+rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+ objdir=.libs
+else
+ # MS-DOS does not allow filenames that begin with a dot.
+ objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+echo "$ac_t$objdir" 1>&6
+
+# Allow CC to be a program name with arguments.
+set dummy $CC
+compiler="$2"
+
+# We assume here that the value for ac_cv_prog_cc_pic will not be cached
+# in isolation, and that seeing it set (from the cache) indicates that
+# the associated values are set (in the cache) correctly too.
+echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6
+echo "$progname:563:checking for $compiler option to produce PIC" 1>&5
+if test "X${ac_cv_prog_cc_pic+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_cv_prog_cc_pic=
+ ac_cv_prog_cc_shlib=
+ ac_cv_prog_cc_wl=
+ ac_cv_prog_cc_static=
+ ac_cv_prog_cc_no_builtin=
+ ac_cv_prog_cc_can_build_shared=$can_build_shared
+
+ if test "$with_gcc" = yes; then
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static='-static'
+
+ case "$host_os" in
+ beos* | irix5* | irix6* | osf3* | osf4* | osf5*)
+ # PIC is the default for these OSes.
+ ;;
+ aix*)
+ # Below there is a dirty hack to force normal static linking with -ldl
+ # The problem is because libdl dynamically linked with both libc and
+ # libC (AIX C++ library), which obviously doesn't included in libraries
+ # list by gcc. This cause undefined symbols with -static flags.
+ # This hack allows C programs to be linked with "-static -ldl", but
+ # we not sure about C++ programs.
+ ac_cv_prog_cc_static="$ac_cv_prog_cc_static ${ac_cv_prog_cc_wl}-lC"
+ ;;
+ cygwin* | mingw* | os2*)
+ # This hack is so that the source file can tell whether it is being
+ # built for inclusion in a dll (and should export symbols for example).
+ ac_cv_prog_cc_pic='-DDLL_EXPORT'
+ ;;
+ amigaos*)
+ # FIXME: we need at least 68020 code to build shared libraries, but
+ # adding the `-m68020' flag to GCC prevents building anything better,
+ # like `-m68040'.
+ ac_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4'
+ ;;
+ sysv4*MP*)
+ if test -d /usr/nec; then
+ ac_cv_prog_cc_pic=-Kconform_pic
+ fi
+ ;;
+ *)
+ ac_cv_prog_cc_pic='-fPIC'
+ ;;
+ esac
+ else
+ # PORTME Check for PIC flags for the system compiler.
+ case "$host_os" in
+ aix3* | aix4*)
+ # All AIX code is PIC.
+ ac_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp'
+ ;;
+
+ hpux9* | hpux10* | hpux11*)
+ # Is there a better ac_cv_prog_cc_static that works with the bundled CC?
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static="${ac_cv_prog_cc_wl}-a ${ac_cv_prog_cc_wl}archive"
+ ac_cv_prog_cc_pic='+Z'
+ ;;
+
+ irix5* | irix6*)
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static='-non_shared'
+ # PIC (with -KPIC) is the default.
+ ;;
+
+ cygwin* | mingw* | os2*)
+ # This hack is so that the source file can tell whether it is being
+ # built for inclusion in a dll (and should export symbols for example).
+ ac_cv_prog_cc_pic='-DDLL_EXPORT'
+ ;;
+
+ osf3* | osf4* | osf5*)
+ # All OSF/1 code is PIC.
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static='-non_shared'
+ ;;
+
+ sco3.2v5*)
+ ac_cv_prog_cc_pic='-Kpic'
+ ac_cv_prog_cc_static='-dn'
+ ac_cv_prog_cc_shlib='-belf'
+ ;;
+
+ solaris*)
+ ac_cv_prog_cc_pic='-KPIC'
+ ac_cv_prog_cc_static='-Bstatic'
+ ac_cv_prog_cc_wl='-Wl,'
+ ;;
+
+ sunos4*)
+ ac_cv_prog_cc_pic='-PIC'
+ ac_cv_prog_cc_static='-Bstatic'
+ ac_cv_prog_cc_wl='-Qoption ld '
+ ;;
+
+ sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ ac_cv_prog_cc_pic='-KPIC'
+ ac_cv_prog_cc_static='-Bstatic'
+ ac_cv_prog_cc_wl='-Wl,'
+ ;;
+
+ uts4*)
+ ac_cv_prog_cc_pic='-pic'
+ ac_cv_prog_cc_static='-Bstatic'
+ ;;
+
+ sysv4*MP*)
+ if test -d /usr/nec ;then
+ ac_cv_prog_cc_pic='-Kconform_pic'
+ ac_cv_prog_cc_static='-Bstatic'
+ fi
+ ;;
+
+ *)
+ ac_cv_prog_cc_can_build_shared=no
+ ;;
+ esac
+ fi
+fi
+if test -z "$ac_cv_prog_cc_pic"; then
+ echo "$ac_t"none 1>&6
+else
+ echo "$ac_t""$ac_cv_prog_cc_pic" 1>&6
+
+ # Check to make sure the pic_flag actually works.
+ echo $ac_n "checking if $compiler PIC flag $ac_cv_prog_cc_pic works... $ac_c" 1>&6
+ echo "$progname:693:checking that $compiler PIC flag $ac_cv_prog_cc_pic works." 1>&5
+ if test "X${ac_cv_prog_cc_pic_works+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+ else
+ ac_cv_prog_cc_pic_works=yes
+ $rm conftest*
+ echo "int some_variable = 0;" > conftest.c
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS $ac_cv_prog_cc_pic -DPIC"
+ if { (eval echo $progname:702: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.$objext; then
+ # Append any warnings to the config.log.
+ cat conftest.err 1>&5
+
+ case "$host_os" in
+ hpux9* | hpux10* | hpux11*)
+ # On HP-UX, both CC and GCC only warn that PIC is supported... then
+ # they create non-PIC objects. So, if there were any warnings, we
+ # assume that PIC is not supported.
+ if test -s conftest.err; then
+ ac_cv_prog_cc_pic_works=no
+ ac_cv_prog_cc_can_build_shared=no
+ ac_cv_prog_cc_pic=
+ else
+ ac_cv_prog_cc_pic_works=yes
+ ac_cv_prog_cc_pic=" $ac_cv_prog_cc_pic"
+ fi
+ ;;
+ *)
+ ac_cv_prog_cc_pic_works=yes
+ ac_cv_prog_cc_pic=" $ac_cv_prog_cc_pic"
+ ;;
+ esac
+ else
+ # Append any errors to the config.log.
+ cat conftest.err 1>&5
+ ac_cv_prog_cc_pic_works=no
+ ac_cv_prog_cc_can_build_shared=no
+ ac_cv_prog_cc_pic=
+ fi
+ CFLAGS="$save_CFLAGS"
+ $rm conftest*
+ fi
+ # Belt *and* braces to stop my trousers falling down:
+ if test "X$ac_cv_prog_cc_pic_works" = Xno; then
+ ac_cv_prog_cc_pic=
+ ac_cv_prog_cc_can_build_shared=no
+ fi
+ echo "$ac_t""$ac_cv_prog_cc_pic_works" 1>&6
+fi
+
+# Check for any special shared library compilation flags.
+if test -n "$ac_cv_prog_cc_shlib"; then
+ echo "$progname: warning: \`$CC' requires \`$ac_cv_prog_cc_shlib' to build shared libraries" 1>&2
+ if echo "$old_CC $old_CFLAGS " | egrep -e "[ ]$ac_cv_prog_cc_shlib[ ]" >/dev/null; then :
+ else
+ echo "$progname: add \`$ac_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure" 1>&2
+ ac_cv_prog_cc_can_build_shared=no
+ fi
+fi
+
+echo $ac_n "checking if $compiler static flag $ac_cv_prog_cc_static works... $ac_c" 1>&6
+echo "$progname:754: checking if $compiler static flag $ac_cv_prog_cc_static works" >&5
+if test "X${ac_cv_prog_cc_static_works+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ $rm conftest*
+ echo 'main(){return(0);}' > conftest.c
+ save_LDFLAGS="$LDFLAGS"
+ LDFLAGS="$LDFLAGS $ac_cv_prog_cc_static"
+ if { (eval echo $progname:762: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+ ac_cv_prog_cc_static_works=yes
+ else
+ ac_cv_prog_cc_static_works=no
+ ac_cv_prog_cc_static=
+ fi
+ LDFLAGS="$save_LDFLAGS"
+ $rm conftest*
+fi
+# Belt *and* braces to stop my trousers falling down:
+if test "X$ac_cv_prog_cc_static_works" = Xno; then
+ ac_cv_prog_cc_static=
+fi
+echo "$ac_t""$ac_cv_prog_cc_static_works" 1>&6
+pic_flag="$ac_cv_prog_cc_pic"
+special_shlib_compile_flags="$ac_cv_prog_cc_shlib"
+wl="$ac_cv_prog_cc_wl"
+link_static_flag="$ac_cv_prog_cc_static"
+no_builtin_flag="$ac_cv_prog_cc_no_builtin"
+can_build_shared="$ac_cv_prog_cc_can_build_shared"
+
+# Check to see if options -o and -c are simultaneously supported by compiler
+echo $ac_n "checking if $compiler supports -c -o file.o... $ac_c" 1>&6
+$rm -r conftest 2>/dev/null
+mkdir conftest
+cd conftest
+$rm conftest*
+echo "int some_variable = 0;" > conftest.c
+mkdir out
+# According to Tom Tromey, Ian Lance Taylor reported there are C compilers
+# that will create temporary files in the current directory regardless of
+# the output directory. Thus, making CWD read-only will cause this test
+# to fail, enabling locking or at least warning the user not to do parallel
+# builds.
+chmod -w .
+save_CFLAGS="$CFLAGS"
+CFLAGS="$CFLAGS -o out/conftest2.o"
+echo "$progname:799: checking if $compiler supports -c -o file.o" >&5
+if { (eval echo $progname:800: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.o; then
+
+ # The compiler can only warn and ignore the option if not recognized
+ # So say no if there are warnings
+ if test -s out/conftest.err; then
+ echo "$ac_t"no 1>&6
+ compiler_c_o=no
+ else
+ echo "$ac_t"yes 1>&6
+ compiler_c_o=yes
+ fi
+else
+ # Append any errors to the config.log.
+ cat out/conftest.err 1>&5
+ compiler_c_o=no
+ echo "$ac_t"no 1>&6
+fi
+CFLAGS="$save_CFLAGS"
+chmod u+w .
+$rm conftest* out/*
+rmdir out
+cd ..
+rmdir conftest
+$rm -r conftest 2>/dev/null
+
+if test x"$compiler_c_o" = x"yes"; then
+ # Check to see if we can write to a .lo
+ echo $ac_n "checking if $compiler supports -c -o file.lo... $ac_c" 1>&6
+ $rm conftest*
+ echo "int some_variable = 0;" > conftest.c
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -c -o conftest.lo"
+ echo "$progname:832: checking if $compiler supports -c -o file.lo" >&5
+if { (eval echo $progname:833: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.lo; then
+
+ # The compiler can only warn and ignore the option if not recognized
+ # So say no if there are warnings
+ if test -s conftest.err; then
+ echo "$ac_t"no 1>&6
+ compiler_o_lo=no
+ else
+ echo "$ac_t"yes 1>&6
+ compiler_o_lo=yes
+ fi
+ else
+ # Append any errors to the config.log.
+ cat conftest.err 1>&5
+ compiler_o_lo=no
+ echo "$ac_t"no 1>&6
+ fi
+ CFLAGS="$save_CFLAGS"
+ $rm conftest*
+else
+ compiler_o_lo=no
+fi
+
+# Check to see if we can do hard links to lock some files if needed
+hard_links="nottested"
+if test "$compiler_c_o" = no && test "$need_locks" != no; then
+ # do not overwrite the value of need_locks provided by the user
+ echo $ac_n "checking if we can lock with hard links... $ac_c" 1>&6
+ hard_links=yes
+ $rm conftest*
+ ln conftest.a conftest.b 2>/dev/null && hard_links=no
+ touch conftest.a
+ ln conftest.a conftest.b 2>&5 || hard_links=no
+ ln conftest.a conftest.b 2>/dev/null && hard_links=no
+ echo "$ac_t$hard_links" 1>&6
+ $rm conftest*
+ if test "$hard_links" = no; then
+ echo "*** WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2
+ need_locks=warn
+ fi
+else
+ need_locks=no
+fi
+
+if test "$with_gcc" = yes; then
+ # Check to see if options -fno-rtti -fno-exceptions are supported by compiler
+ echo $ac_n "checking if $compiler supports -fno-rtti -fno-exceptions ... $ac_c" 1>&6
+ $rm conftest*
+ echo "int some_variable = 0;" > conftest.c
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.c"
+ echo "$progname:884: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
+ if { (eval echo $progname:885: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then
+
+ # The compiler can only warn and ignore the option if not recognized
+ # So say no if there are warnings
+ if test -s conftest.err; then
+ echo "$ac_t"no 1>&6
+ compiler_rtti_exceptions=no
+ else
+ echo "$ac_t"yes 1>&6
+ compiler_rtti_exceptions=yes
+ fi
+ else
+ # Append any errors to the config.log.
+ cat conftest.err 1>&5
+ compiler_rtti_exceptions=no
+ echo "$ac_t"no 1>&6
+ fi
+ CFLAGS="$save_CFLAGS"
+ $rm conftest*
+
+ if test "$compiler_rtti_exceptions" = "yes"; then
+ no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions'
+ else
+ no_builtin_flag=' -fno-builtin'
+ fi
+
+fi
+
+# See if the linker supports building shared libraries.
+echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6
+
+allow_undefined_flag=
+no_undefined_flag=
+need_lib_prefix=unknown
+need_version=unknown
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+archive_cmds=
+archive_expsym_cmds=
+old_archive_from_new_cmds=
+old_archive_from_expsyms_cmds=
+striplib=
+old_striplib=
+export_dynamic_flag_spec=
+whole_archive_flag_spec=
+thread_safe_flag_spec=
+hardcode_into_libs=no
+hardcode_libdir_flag_spec=
+hardcode_libdir_separator=
+hardcode_direct=no
+hardcode_minus_L=no
+hardcode_shlibpath_var=unsupported
+runpath_var=
+link_all_deplibs=unknown
+always_export_symbols=no
+export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols'
+# include_expsyms should be a list of space-separated symbols to be *always*
+# included in the symbol list
+include_expsyms=
+# exclude_expsyms can be an egrep regular expression of symbols to exclude
+# it will be wrapped by ` (' and `)$', so one must not match beginning or
+# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+# as well as any symbol that contains `d'.
+exclude_expsyms="_GLOBAL_OFFSET_TABLE_"
+# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+# platforms (ab)use it in PIC code, but their linkers get confused if
+# the symbol is explicitly referenced. Since portable code cannot
+# rely on this symbol name, it's probably fine to never include it in
+# preloaded symbol tables.
+extract_expsyms_cmds=
+
+case "$host_os" in
+cygwin* | mingw*)
+ # FIXME: the MSVC++ port hasn't been tested in a loooong time
+ # When not using gcc, we currently assume that we are using
+ # Microsoft Visual C++.
+ if test "$with_gcc" != yes; then
+ with_gnu_ld=no
+ fi
+ ;;
+
+esac
+
+ld_shlibs=yes
+if test "$with_gnu_ld" = yes; then
+ # If archive_cmds runs LD, not CC, wlarc should be empty
+ wlarc='${wl}'
+
+ # See if GNU ld supports shared libraries.
+ case "$host_os" in
+ aix3* | aix4*)
+ # On AIX, the GNU linker is very broken
+ ld_shlibs=no
+ cat <<EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.9.1, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support. If you
+*** really care for shared libraries, you may want to modify your PATH
+*** so that a non-GNU linker is found, and then restart.
+
+EOF
+ ;;
+
+ amigaos*)
+ archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_minus_L=yes
+
+ # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports
+ # that the semantics of dynamic libraries on AmigaOS, at least up
+ # to version 4, is to share data among multiple programs linked
+ # with the same dynamic library. Since this doesn't match the
+ # behavior of shared libraries on other platforms, we can use
+ # them.
+ ld_shlibs=no
+ ;;
+
+ beos*)
+ if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+ allow_undefined_flag=unsupported
+ # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+ # support --undefined. This deserves some investigation. FIXME
+ archive_cmds='$CC -nostart $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname -o $lib'
+ else
+ ld_shlibs=no
+ fi
+ ;;
+
+ cygwin* | mingw*)
+ # hardcode_libdir_flag_spec is actually meaningless, as there is
+ # no search path for DLLs.
+ hardcode_libdir_flag_spec='-L$libdir'
+ allow_undefined_flag=unsupported
+ always_export_symbols=yes
+
+ extract_expsyms_cmds='test -f $output_objdir/impgen.c || \
+ sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //; p; }" -e d < $0 > $output_objdir/impgen.c~
+ test -f $output_objdir/impgen.exe || (cd $output_objdir && \
+ if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \
+ else $CC -o impgen impgen.c ; fi)~
+ $output_objdir/impgen $dir/$soname > $output_objdir/$soname-def'
+
+ old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib'
+
+ # cygwin and mingw dlls have different entry points and sets of symbols
+ # to exclude.
+ # FIXME: what about values for MSVC?
+ dll_entry=__cygwin_dll_entry@12
+ dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~
+ case "$host_os" in
+ mingw*)
+ # mingw values
+ dll_entry=_DllMainCRTStartup@12
+ dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~
+ ;;
+ esac
+
+ # mingw and cygwin differ, and it's simplest to just exclude the union
+ # of the two symbol sets.
+ dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12
+
+ # recent cygwin and mingw systems supply a stub DllMain which the user
+ # can override, but on older systems we have to supply one (in ltdll.c)
+ if test "x$lt_cv_need_dllmain" = "xyes"; then
+ ltdll_obj='$output_objdir/$soname-ltdll.'"$objext "
+ ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $0 > $output_objdir/$soname-ltdll.c~
+ test -f $output_objdir/$soname-ltdll.$objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~'
+ else
+ ltdll_obj=
+ ltdll_cmds=
+ fi
+
+ # Extract the symbol export list from an `--export-all' def file,
+ # then regenerate the def file from the symbol export list, so that
+ # the compiled dll only exports the symbol export list.
+ # Be careful not to strip the DATA tag left be newer dlltools.
+ export_symbols_cmds="$ltdll_cmds"'
+ $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~
+ sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols'
+
+ # If DATA tags from a recent dlltool are present, honour them!
+ archive_expsym_cmds='echo EXPORTS > $output_objdir/$soname-def~
+ _lt_hint=1;
+ cat $export_symbols | while read symbol; do
+ set dummy \$symbol;
+ case \$# in
+ 2) echo " \$2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;;
+ *) echo " \$2 @ \$_lt_hint \$3 ; " >> $output_objdir/$soname-def;;
+ esac;
+ _lt_hint=`expr 1 + \$_lt_hint`;
+ done~
+ '"$ltdll_cmds"'
+ $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+ $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~
+ $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+ $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~
+ $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags'
+ ;;
+
+ netbsd*)
+ if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+ archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+ archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+ else
+ archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+ fi
+ ;;
+
+ solaris* | sysv5*)
+ if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then
+ ld_shlibs=no
+ cat <<EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems. Therefore, libtool
+*** is disabling shared libraries support. We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer. Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+EOF
+ elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+ archive_cmds='$CC -shared $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname -o $lib'
+ archive_expsym_cmds='$CC -shared $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+ else
+ ld_shlibs=no
+ fi
+ ;;
+
+ sunos4*)
+ archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ wlarc=
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ *)
+ if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+ archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+ archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+ else
+ ld_shlibs=no
+ fi
+ ;;
+ esac
+
+ if test "$ld_shlibs" = yes; then
+ runpath_var=LD_RUN_PATH
+ hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir'
+ export_dynamic_flag_spec='${wl}--export-dynamic'
+ case $host_os in
+ cygwin* | mingw*)
+ # dlltool doesn't understand --whole-archive et. al.
+ whole_archive_flag_spec=
+ ;;
+ *)
+ # ancient GNU ld didn't support --whole-archive et. al.
+ if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then
+ whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+ else
+ whole_archive_flag_spec=
+ fi
+ ;;
+ esac
+ fi
+else
+ # PORTME fill in a description of your system's linker (not GNU ld)
+ case "$host_os" in
+ aix3*)
+ allow_undefined_flag=unsupported
+ always_export_symbols=yes
+ archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR cru $lib $output_objdir/$soname'
+ # Note: this linker hardcodes the directories in LIBPATH if there
+ # are no directories specified by -L.
+ hardcode_minus_L=yes
+ if test "$with_gcc" = yes && test -z "$link_static_flag"; then
+ # Neither direct hardcoding nor static linking is supported with a
+ # broken collect2.
+ hardcode_direct=unsupported
+ fi
+ ;;
+
+ aix4*)
+ hardcode_libdir_flag_spec='${wl}-b ${wl}nolibpath ${wl}-b ${wl}libpath:$libdir:/usr/lib:/lib'
+ hardcode_libdir_separator=':'
+ if test "$with_gcc" = yes; then
+ collect2name=`${CC} -print-prog-name=collect2`
+ if test -f "$collect2name" && \
+ strings "$collect2name" | grep resolve_lib_name >/dev/null
+ then
+ # We have reworked collect2
+ hardcode_direct=yes
+ else
+ # We have old collect2
+ hardcode_direct=unsupported
+ # It fails to find uninstalled libraries when the uninstalled
+ # path is not listed in the libpath. Setting hardcode_minus_L
+ # to unsupported forces relinking
+ hardcode_minus_L=yes
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_libdir_separator=
+ fi
+ shared_flag='-shared'
+ else
+ shared_flag='${wl}-bM:SRE'
+ hardcode_direct=yes
+ fi
+ allow_undefined_flag=' ${wl}-berok'
+ archive_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${wl}-bexpall ${wl}-bnoentry${allow_undefined_flag}'
+ archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${wl}-bE:$export_symbols ${wl}-bnoentry${allow_undefined_flag}'
+ case "$host_os" in aix4.[01]|aix4.[01].*)
+ # According to Greg Wooledge, -bexpall is only supported from AIX 4.2 on
+ always_export_symbols=yes ;;
+ esac
+ ;;
+
+ amigaos*)
+ archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_minus_L=yes
+ # see comment about different semantics on the GNU ld section
+ ld_shlibs=no
+ ;;
+
+ cygwin* | mingw*)
+ # When not using gcc, we currently assume that we are using
+ # Microsoft Visual C++.
+ # hardcode_libdir_flag_spec is actually meaningless, as there is
+ # no search path for DLLs.
+ hardcode_libdir_flag_spec=' '
+ allow_undefined_flag=unsupported
+ # Tell ltmain to make .lib files, not .a files.
+ libext=lib
+ # FIXME: Setting linknames here is a bad hack.
+ archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames='
+ # The linker will automatically build a .lib file if we build a DLL.
+ old_archive_from_new_cmds='true'
+ # FIXME: Should let the user specify the lib program.
+ old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs'
+ fix_srcfile_path='`cygpath -w $srcfile`'
+ ;;
+
+ freebsd1*)
+ ld_shlibs=no
+ ;;
+
+ # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+ # support. Future versions do this automatically, but an explicit c++rt0.o
+ # does not break anything, and helps significantly (at the cost of a little
+ # extra space).
+ freebsd2.2*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+ freebsd2*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_direct=yes
+ hardcode_minus_L=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+ freebsd*)
+ archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ hpux9* | hpux10* | hpux11*)
+ case "$host_os" in
+ hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;;
+ *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;;
+ esac
+ hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+ hardcode_libdir_separator=:
+ hardcode_direct=yes
+ hardcode_minus_L=yes # Not in the search PATH, but as the default
+ # location of the library.
+ export_dynamic_flag_spec='${wl}-E'
+ ;;
+
+ irix5* | irix6*)
+ if test "$with_gcc" = yes; then
+ archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+ else
+ archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+ fi
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+ hardcode_libdir_separator=:
+ link_all_deplibs=yes
+ ;;
+
+ netbsd*)
+ if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out
+ else
+ archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF
+ fi
+ hardcode_libdir_flag_spec='${wl}-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ openbsd*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ os2*)
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_minus_L=yes
+ allow_undefined_flag=unsupported
+ archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+ old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+ ;;
+
+ osf3*)
+ if test "$with_gcc" = yes; then
+ allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+ archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+ else
+ allow_undefined_flag=' -expect_unresolved \*'
+ archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+ fi
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+ hardcode_libdir_separator=:
+ ;;
+
+ osf4* | osf5*) # as osf3* with the addition of -msym flag
+ if test "$with_gcc" = yes; then
+ allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+ archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+ else
+ allow_undefined_flag=' -expect_unresolved \*'
+ archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+ fi
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+ hardcode_libdir_separator=:
+ ;;
+
+ sco3.2v5*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_shlibpath_var=no
+ runpath_var=LD_RUN_PATH
+ hardcode_runpath_var=yes
+ ;;
+
+ solaris*)
+ no_undefined_flag=' -z text'
+ # $CC -shared without GNU ld will not create a library from C++
+ # object files and a static libstdc++, better avoid it by now
+ archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_shlibpath_var=no
+ case "$host_os" in
+ solaris2.[0-5] | solaris2.[0-5].*) ;;
+ *) # Supported since Solaris 2.6 (maybe 2.5.1?)
+ whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;;
+ esac
+ link_all_deplibs=yes
+ ;;
+
+ sunos4*)
+ archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_direct=yes
+ hardcode_minus_L=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ sysv4)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ runpath_var='LD_RUN_PATH'
+ hardcode_shlibpath_var=no
+ hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+ ;;
+
+ sysv4.3*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_shlibpath_var=no
+ export_dynamic_flag_spec='-Bexport'
+ ;;
+
+ sysv5*)
+ no_undefined_flag=' -z text'
+ # $CC -shared without GNU ld will not create a library from C++
+ # object files and a static libstdc++, better avoid it by now
+ archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+ hardcode_libdir_flag_spec=
+ hardcode_shlibpath_var=no
+ runpath_var='LD_RUN_PATH'
+ ;;
+
+ uts4*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_shlibpath_var=no
+ ;;
+
+ dgux*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_shlibpath_var=no
+ ;;
+
+ sysv4*MP*)
+ if test -d /usr/nec; then
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_shlibpath_var=no
+ runpath_var=LD_RUN_PATH
+ hardcode_runpath_var=yes
+ ld_shlibs=yes
+ fi
+ ;;
+
+ sysv4.2uw2*)
+ archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_direct=yes
+ hardcode_minus_L=no
+ hardcode_shlibpath_var=no
+ hardcode_runpath_var=yes
+ runpath_var=LD_RUN_PATH
+ ;;
+
+ unixware7*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ runpath_var='LD_RUN_PATH'
+ hardcode_shlibpath_var=no
+ ;;
+
+ *)
+ ld_shlibs=no
+ ;;
+ esac
+fi
+echo "$ac_t$ld_shlibs" 1>&6
+test "$ld_shlibs" = no && can_build_shared=no
+
+# Check hardcoding attributes.
+echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" || \
+ test -n "$runpath_var"; then
+
+ # We can hardcode non-existant directories.
+ if test "$hardcode_direct" != no &&
+ # If the only mechanism to avoid hardcoding is shlibpath_var, we
+ # have to relink, otherwise we might link with an installed library
+ # when we should be linking with a yet-to-be-installed one
+ ## test "$hardcode_shlibpath_var" != no &&
+ test "$hardcode_minus_L" != no; then
+ # Linking always hardcodes the temporary library directory.
+ hardcode_action=relink
+ else
+ # We can link without hardcoding, and we can hardcode nonexisting dirs.
+ hardcode_action=immediate
+ fi
+else
+ # We cannot hardcode anything, or else we can only hardcode existing
+ # directories.
+ hardcode_action=unsupported
+fi
+echo "$ac_t$hardcode_action" 1>&6
+
+echo $ac_n "checking whether stripping libraries is possible... $ac_c" 1>&6
+if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then
+ test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+ test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+ echo "${ac_t}yes" 1>&6
+else
+ echo "${ac_t}no" 1>&6
+fi
+
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+# PORTME Fill in your ld.so characteristics
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+
+echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6
+case "$host_os" in
+aix3*)
+ version_type=linux
+ library_names_spec='${libname}${release}.so$versuffix $libname.a'
+ shlibpath_var=LIBPATH
+
+ # AIX has no versioning support, so we append a major version to the name.
+ soname_spec='${libname}${release}.so$major'
+ ;;
+
+aix4*)
+ version_type=linux
+ # AIX has no versioning support, so currently we can not hardcode correct
+ # soname into executable. Probably we can add versioning support to
+ # collect2, so additional links can be useful in future.
+ # We preserve .a as extension for shared libraries though AIX4.2
+ # and later linker supports .so
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.a'
+ shlibpath_var=LIBPATH
+ ;;
+
+amigaos*)
+ library_names_spec='$libname.ixlibrary $libname.a'
+ # Create ${libname}_ixlibrary.a entries in /sys/libs.
+ finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
+ ;;
+
+beos*)
+ library_names_spec='${libname}.so'
+ dynamic_linker="$host_os ld.so"
+ shlibpath_var=LIBRARY_PATH
+ lt_cv_dlopen="load_add_on"
+ lt_cv_dlopen_libs=
+ lt_cv_dlopen_self=yes
+ ;;
+
+bsdi4*)
+ version_type=linux
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+ sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+ export_dynamic_flag_spec=-rdynamic
+ # the default ld.so.conf also contains /usr/contrib/lib and
+ # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+ # libtool to hard-code these into programs
+ ;;
+
+cygwin* | mingw*)
+ version_type=windows
+ need_version=no
+ need_lib_prefix=no
+ if test "$with_gcc" = yes; then
+ library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll'
+ else
+ library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.lib'
+ fi
+ dynamic_linker='Win32 ld.exe'
+ # FIXME: first we should search . and the directory the executable is in
+ shlibpath_var=PATH
+ lt_cv_dlopen="LoadLibrary"
+ lt_cv_dlopen_libs=
+ ;;
+
+freebsd1*)
+ dynamic_linker=no
+ ;;
+
+freebsd*)
+ objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout`
+ version_type=freebsd-$objformat
+ case "$version_type" in
+ freebsd-elf*)
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
+ need_version=no
+ need_lib_prefix=no
+ ;;
+ freebsd-*)
+ library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix'
+ need_version=yes
+ ;;
+ esac
+ shlibpath_var=LD_LIBRARY_PATH
+ case "$host_os" in
+ freebsd2*)
+ shlibpath_overrides_runpath=yes
+ ;;
+ freebsd3.[01]* | freebsdelf3.[01]*)
+ shlibpath_overrides_runpath=yes
+ hardcode_into_libs=yes
+ ;;
+ *) # from 3.2 on
+ shlibpath_overrides_runpath=no
+ hardcode_into_libs=yes
+ ;;
+ esac
+ ;;
+
+gnu*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ hardcode_into_libs=yes
+ ;;
+
+hpux9* | hpux10* | hpux11*)
+ # Give a soname corresponding to the major version so that dld.sl refuses to
+ # link against other versions.
+ dynamic_linker="$host_os dld.sl"
+ version_type=sunos
+ need_lib_prefix=no
+ need_version=no
+ shlibpath_var=SHLIB_PATH
+ shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+ library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl'
+ soname_spec='${libname}${release}.sl$major'
+ # HP-UX runs *really* slowly unless shared libraries are mode 555.
+ postinstall_cmds='chmod 555 $lib'
+ ;;
+
+irix5* | irix6*)
+ version_type=irix
+ need_lib_prefix=no
+ need_version=no
+ soname_spec='${libname}${release}.so.$major'
+ library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major ${libname}${release}.so $libname.so'
+ case "$host_os" in
+ irix5*)
+ libsuff= shlibsuff=
+ ;;
+ *)
+ case "$LD" in # libtool.m4 will add one of these switches to LD
+ *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;;
+ *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;;
+ *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;;
+ *) libsuff= shlibsuff= libmagic=never-match;;
+ esac
+ ;;
+ esac
+ shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+ shlibpath_overrides_runpath=no
+ sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+ sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+ ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
+ dynamic_linker=no
+ ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ shlibpath_overrides_runpath=no
+ # This implies no fast_install, which is unacceptable.
+ # Some rework will be needed to allow for fast_install
+ # before this can be enabled.
+ hardcode_into_libs=yes
+
+ if test -f /lib/ld.so.1; then
+ dynamic_linker='GNU ld.so'
+ else
+ # Only the GNU ld.so supports shared libraries on MkLinux.
+ case "$host_cpu" in
+ powerpc*) dynamic_linker=no ;;
+ *) dynamic_linker='Linux ld.so' ;;
+ esac
+ fi
+ ;;
+
+netbsd*)
+ version_type=sunos
+ if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+ library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+ dynamic_linker='NetBSD (a.out) ld.so'
+ else
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so'
+ soname_spec='${libname}${release}.so$major'
+ dynamic_linker='NetBSD ld.elf_so'
+ fi
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+openbsd*)
+ version_type=sunos
+ if test "$with_gnu_ld" = yes; then
+ need_lib_prefix=no
+ need_version=no
+ fi
+ library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+os2*)
+ libname_spec='$name'
+ need_lib_prefix=no
+ library_names_spec='$libname.dll $libname.a'
+ dynamic_linker='OS/2 ld.exe'
+ shlibpath_var=LIBPATH
+ ;;
+
+osf3* | osf4* | osf5*)
+ version_type=osf
+ need_version=no
+ soname_spec='${libname}${release}.so'
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
+ shlibpath_var=LD_LIBRARY_PATH
+ sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+ sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+ ;;
+
+sco3.2v5*)
+ version_type=osf
+ soname_spec='${libname}${release}.so$major'
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+solaris*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ shlibpath_overrides_runpath=yes
+ hardcode_into_libs=yes
+ # ldd complains unless libraries are executable
+ postinstall_cmds='chmod +x $lib'
+ ;;
+
+sunos4*)
+ version_type=sunos
+ library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+ finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ shlibpath_overrides_runpath=yes
+ if test "$with_gnu_ld" = yes; then
+ need_lib_prefix=no
+ fi
+ need_version=yes
+ ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ version_type=linux
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ case "$host_vendor" in
+ motorola)
+ need_lib_prefix=no
+ need_version=no
+ shlibpath_overrides_runpath=no
+ sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+ ;;
+ esac
+ ;;
+
+uts4*)
+ version_type=linux
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+dgux*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+sysv4*MP*)
+ if test -d /usr/nec ;then
+ version_type=linux
+ library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so'
+ soname_spec='$libname.so.$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ fi
+ ;;
+
+*)
+ dynamic_linker=no
+ ;;
+esac
+echo "$ac_t$dynamic_linker" 1>&6
+test "$dynamic_linker" = no && can_build_shared=no
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix. What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Transform the above into a raw symbol and a C symbol.
+symxfrm='\1 \2\3 \3'
+
+# Transform an extracted symbol line into a proper C declaration
+global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'"
+
+# Define system-specific variables.
+case "$host_os" in
+aix*)
+ symcode='[BCDT]'
+ ;;
+cygwin* | mingw*)
+ symcode='[ABCDGISTW]'
+ ;;
+hpux*) # Its linker distinguishes data from code symbols
+ global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^. .* \(.*\)$/extern char \1;/p'"
+ ;;
+irix*)
+ symcode='[BCDEGRST]'
+ ;;
+solaris* | sysv5*)
+ symcode='[BDT]'
+ ;;
+sysv4)
+ symcode='[DFNSTU]'
+ ;;
+esac
+
+# Handle CRLF in mingw too chain
+opt_cr=
+case "$host_os" in
+mingw*)
+ opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+ ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
+ symcode='[ABCDGISTW]'
+fi
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+ # Write the raw and C identifiers.
+global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"
+
+ # Check to see that the pipe works correctly.
+ pipe_works=no
+ $rm conftest*
+ cat > conftest.c <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+ echo "$progname:1867: checking if global_symbol_pipe works" >&5
+ if { (eval echo $progname:1868: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.$objext; then
+ # Now try to grab the symbols.
+ nlist=conftest.nm
+ if { echo "$progname:1871: eval \"$NM conftest.$objext | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.$objext | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then
+
+ # Try sorting and uniquifying the output.
+ if sort "$nlist" | uniq > "$nlist"T; then
+ mv -f "$nlist"T "$nlist"
+ else
+ rm -f "$nlist"T
+ fi
+
+ # Make sure that we snagged all the symbols we need.
+ if egrep ' nm_test_var$' "$nlist" >/dev/null; then
+ if egrep ' nm_test_func$' "$nlist" >/dev/null; then
+ cat <<EOF > conftest.c
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+ # Now generate the symbol file.
+ eval "$global_symbol_to_cdecl"' < "$nlist" >> conftest.c'
+
+ cat <<EOF >> conftest.c
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+ const char *name;
+ lt_ptr_t address;
+}
+lt_preloaded_symbols[] =
+{
+EOF
+ sed 's/^. \(.*\) \(.*\)$/ {"\2", (lt_ptr_t) \&\2},/' < "$nlist" >> conftest.c
+ cat <<\EOF >> conftest.c
+ {0, (lt_ptr_t) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+ # Now try linking the two files.
+ mv conftest.$objext conftstm.$objext
+ save_LIBS="$LIBS"
+ save_CFLAGS="$CFLAGS"
+ LIBS="conftstm.$objext"
+ CFLAGS="$CFLAGS$no_builtin_flag"
+ if { (eval echo $progname:1923: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+ pipe_works=yes
+ else
+ echo "$progname: failed program was:" >&5
+ cat conftest.c >&5
+ fi
+ LIBS="$save_LIBS"
+ else
+ echo "cannot find nm_test_func in $nlist" >&5
+ fi
+ else
+ echo "cannot find nm_test_var in $nlist" >&5
+ fi
+ else
+ echo "cannot run $global_symbol_pipe" >&5
+ fi
+ else
+ echo "$progname: failed program was:" >&5
+ cat conftest.c >&5
+ fi
+ $rm conftest* conftst*
+
+ # Do not use the global_symbol_pipe unless it works.
+ if test "$pipe_works" = yes; then
+ break
+ else
+ global_symbol_pipe=
+ fi
+done
+if test "$pipe_works" = yes; then
+ echo "${ac_t}ok" 1>&6
+else
+ echo "${ac_t}failed" 1>&6
+fi
+
+if test -z "$global_symbol_pipe"; then
+ global_symbol_to_cdecl=
+fi
+
+# Report the final consequences.
+echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6
+
+# Only try to build win32 dlls if AC_LIBTOOL_WIN32_DLL was used in
+# configure.in, otherwise build static only libraries.
+case "$host_os" in
+cygwin* | mingw* | os2*)
+ if test x$can_build_shared = xyes; then
+ test x$enable_win32_dll = xno && can_build_shared=no
+ echo "checking if package supports dlls... $can_build_shared" 1>&6
+ fi
+;;
+esac
+
+echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case "$host_os" in
+aix3*)
+ test "$enable_shared" = yes && enable_static=no
+ if test -n "$RANLIB"; then
+ archive_cmds="$archive_cmds~\$RANLIB \$lib"
+ postinstall_cmds='$RANLIB $lib'
+ fi
+ ;;
+
+aix4*)
+ test "$enable_shared" = yes && enable_static=no
+ ;;
+esac
+
+echo "$ac_t$enable_shared" 1>&6
+
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+
+echo "checking whether to build static libraries... $enable_static" 1>&6
+
+if test "$hardcode_action" = relink || test "$hardcode_into_libs" = all; then
+ # Fast installation is not supported
+ enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+ test "$enable_shared" = no; then
+ # Fast installation is not necessary
+ enable_fast_install=needless
+fi
+
+# Check whether we must set pic_mode to default
+test -z "$pic_flag" && pic_mode=default
+# On Cygwin there's no "real" PIC flag so we must build both object types
+case "$host_os" in
+cygwin* | mingw* | os2*)
+ pic_mode=default
+ ;;
+esac
+if test $pic_mode = no && test "$deplibs_check_method" != pass_all; then
+ # non-PIC code in shared libraries is not supported
+ pic_mode=default
+fi
+
+if test "x$enable_dlopen" != xyes; then
+ enable_dlopen=unknown
+ enable_dlopen_self=unknown
+ enable_dlopen_self_static=unknown
+else
+if test "X${lt_cv_dlopen+set}" != Xset; then
+ lt_cv_dlopen=no lt_cv_dlopen_libs=
+echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6
+echo "$progname:2032: checking for dlopen in -ldl" >&5
+if test "X${ac_cv_lib_dl_dlopen+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_save_LIBS="$LIBS"
+LIBS="-ldl $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 2039 "ltconfig"
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen();
+
+int main() {
+dlopen()
+; return 0; }
+EOF
+if { (eval echo $progname:2052: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_lib_dl_dlopen=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_lib_dl_dlopen=no
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if test "X$ac_cv_lib_dl_dlopen" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dlopen""... $ac_c" 1>&6
+echo "$progname:2071: checking for dlopen" >&5
+if test "X${ac_cv_func_dlopen+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2076 "ltconfig"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char dlopen(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_dlopen) || defined (__stub___dlopen)
+choke me
+#else
+dlopen();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo $progname:2101: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_func_dlopen=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_func_dlopen=no
+fi
+rm -f conftest*
+fi
+if test "X$ac_cv_func_dlopen" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="dlopen"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dld_link in -ldld""... $ac_c" 1>&6
+echo "$progname:2118: checking for dld_link in -ldld" >&5
+if test "X${ac_cv_lib_dld_dld_link+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_save_LIBS="$LIBS"
+LIBS="-ldld $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 2125 "ltconfig"
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dld_link();
+
+int main() {
+dld_link()
+; return 0; }
+EOF
+if { (eval echo $progname:2138: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_lib_dld_dld_link=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_lib_dld_dld_link=no
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if test "X$ac_cv_lib_dld_dld_link" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for shl_load""... $ac_c" 1>&6
+echo "$progname:2157: checking for shl_load" >&5
+if test "X${ac_cv_func_shl_load+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2162 "ltconfig"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char shl_load(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_shl_load) || defined (__stub___shl_load)
+choke me
+#else
+shl_load();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo $progname:2187: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_func_shl_load=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_func_shl_load=no
+fi
+rm -f conftest*
+fi
+
+if test "X$ac_cv_func_shl_load" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="shl_load"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for shl_load in -ldld""... $ac_c" 1>&6
+echo "$progname:2205: checking for shl_load in -ldld" >&5
+if test "X${ac_cv_lib_dld_shl_load+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_save_LIBS="$LIBS"
+LIBS="-ldld $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 2212 "ltconfig"
+#include "confdefs.h"
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load();
+
+int main() {
+shl_load()
+; return 0; }
+EOF
+if { (eval echo $progname:2226: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_lib_dld_shl_load=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_lib_dld_shl_load=no
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if test "X$ac_cv_lib_dld_shl_load" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+fi
+
+ if test "x$lt_cv_dlopen" != xno; then
+ enable_dlopen=yes
+ fi
+
+ case "$lt_cv_dlopen" in
+ dlopen)
+for ac_hdr in dlfcn.h; do
+ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+echo "$progname:2269: checking for $ac_hdr" >&5
+if eval "test \"`echo 'X$''{'ac_cv_header_$ac_safe'+set}'`\" = Xset"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2274 "ltconfig"
+#include <$ac_hdr>
+int fnord = 0;
+int main () { }
+EOF
+ac_try="$ac_compile >/dev/null 2>conftest.out"
+{ (eval echo $progname:2280: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=yes"
+else
+ echo "$ac_err" >&5
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+else
+ echo "$ac_t""no" 1>&6
+fi
+done
+
+ if test "x$ac_cv_header_dlfcn_h" = xyes; then
+ CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+ fi
+ eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+ LIBS="$lt_cv_dlopen_libs $LIBS"
+
+ echo $ac_n "checking whether a program can dlopen itself""... $ac_c" 1>&6
+echo "$progname:2308: checking whether a program can dlopen itself" >&5
+if test "X${lt_cv_dlopen_self+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ lt_cv_dlopen_self=cross
+ else
+ cat > conftest.c <<EOF
+#line 2316 "ltconfig"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+# define LTDL_GLOBAL RTLD_GLOBAL
+#else
+# ifdef DL_GLOBAL
+# define LTDL_GLOBAL DL_GLOBAL
+# else
+# define LTDL_GLOBAL 0
+# endif
+#endif
+
+/* We may have to define LTDL_LAZY_OR_NOW in the command line if we
+ find out it does not work in some platform. */
+#ifndef LTDL_LAZY_OR_NOW
+# ifdef RTLD_LAZY
+# define LTDL_LAZY_OR_NOW RTLD_LAZY
+# else
+# ifdef DL_LAZY
+# define LTDL_LAZY_OR_NOW DL_LAZY
+# else
+# ifdef RTLD_NOW
+# define LTDL_LAZY_OR_NOW RTLD_NOW
+# else
+# ifdef DL_NOW
+# define LTDL_LAZY_OR_NOW DL_NOW
+# else
+# define LTDL_LAZY_OR_NOW 0
+# endif
+# endif
+# endif
+# endif
+#endif
+
+fnord() { int i=42;}
+main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW);
+ if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord");
+ if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); }
+
+EOF
+if { (eval echo $progname:2362: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+then
+ lt_cv_dlopen_self=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ lt_cv_dlopen_self=no
+fi
+rm -fr conftest*
+fi
+
+fi
+
+echo "$ac_t""$lt_cv_dlopen_self" 1>&6
+
+ if test "$lt_cv_dlopen_self" = yes; then
+ LDFLAGS="$LDFLAGS $link_static_flag"
+ echo $ac_n "checking whether a statically linked program can dlopen itself""... $ac_c" 1>&6
+echo "$progname:2381: checking whether a statically linked program can dlopen itself" >&5
+if test "X${lt_cv_dlopen_self_static+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ lt_cv_dlopen_self_static=cross
+ else
+ cat > conftest.c <<EOF
+#line 2389 "ltconfig"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+# define LTDL_GLOBAL RTLD_GLOBAL
+#else
+# ifdef DL_GLOBAL
+# define LTDL_GLOBAL DL_GLOBAL
+# else
+# define LTDL_GLOBAL 0
+# endif
+#endif
+
+/* We may have to define LTDL_LAZY_OR_NOW in the command line if we
+ find out it does not work in some platform. */
+#ifndef LTDL_LAZY_OR_NOW
+# ifdef RTLD_LAZY
+# define LTDL_LAZY_OR_NOW RTLD_LAZY
+# else
+# ifdef DL_LAZY
+# define LTDL_LAZY_OR_NOW DL_LAZY
+# else
+# ifdef RTLD_NOW
+# define LTDL_LAZY_OR_NOW RTLD_NOW
+# else
+# ifdef DL_NOW
+# define LTDL_LAZY_OR_NOW DL_NOW
+# else
+# define LTDL_LAZY_OR_NOW 0
+# endif
+# endif
+# endif
+# endif
+#endif
+
+fnord() { int i=42;}
+main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW);
+ if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord");
+ if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); }
+
+EOF
+if { (eval echo $progname:2435: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+then
+ lt_cv_dlopen_self_static=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ lt_cv_dlopen_self_static=no
+fi
+rm -fr conftest*
+fi
+
+fi
+
+echo "$ac_t""$lt_cv_dlopen_self_static" 1>&6
+fi
+ ;;
+ esac
+
+ case "$lt_cv_dlopen_self" in
+ yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+ *) enable_dlopen_self=unknown ;;
+ esac
+
+ case "$lt_cv_dlopen_self_static" in
+ yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+ *) enable_dlopen_self_static=unknown ;;
+ esac
+fi
+
+# Copy echo and quote the copy, instead of the original, because it is
+# used later.
+ltecho="$echo"
+if test "X$ltecho" = "X$CONFIG_SHELL $0 --fallback-echo"; then
+ ltecho="$CONFIG_SHELL \$0 --fallback-echo"
+fi
+LTSHELL="$SHELL"
+
+LTCONFIG_VERSION="$VERSION"
+
+# Only quote variables if we're using ltmain.sh.
+case "$ltmain" in
+*.sh)
+ # Now quote all the things that may contain metacharacters.
+ for var in ltecho old_AR old_CC old_CFLAGS old_CPPFLAGS \
+ old_MAGIC old_LD old_LDFLAGS old_LIBS \
+ old_LN_S old_NM old_RANLIB old_STRIP \
+ old_AS old_DLLTOOL old_OBJDUMP \
+ old_OBJEXT old_EXEEXT old_reload_flag \
+ old_deplibs_check_method old_file_magic_cmd \
+ AR CC LD LN_S NM LTSHELL LTCONFIG_VERSION \
+ reload_flag reload_cmds wl \
+ pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \
+ thread_safe_flag_spec whole_archive_flag_spec libname_spec \
+ library_names_spec soname_spec \
+ RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \
+ old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \
+ postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \
+ old_striplib striplib file_magic_cmd export_symbols_cmds \
+ deplibs_check_method allow_undefined_flag no_undefined_flag \
+ finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \
+ hardcode_libdir_flag_spec hardcode_libdir_separator \
+ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
+ compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do
+
+ case "$var" in
+ reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \
+ old_postinstall_cmds | old_postuninstall_cmds | \
+ export_symbols_cmds | archive_cmds | archive_expsym_cmds | \
+ extract_expsyms_cmds | old_archive_from_expsyms_cmds | \
+ postinstall_cmds | postuninstall_cmds | \
+ finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec)
+ # Double-quote double-evaled strings.
+ eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ### testsuite: skip nested quoting test
+ ;;
+ *)
+ eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ### testsuite: skip nested quoting test
+ ;;
+ esac
+ done
+
+ case "$ltecho" in
+ *'\$0 --fallback-echo"')
+ ltecho=`$echo "X$ltecho" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'`
+ ;;
+ esac
+
+ trap "$rm \"$ofile\"; exit 1" 1 2 15
+ echo "creating $ofile"
+ $rm "$ofile"
+ cat <<EOF > "$ofile"
+#! $SHELL
+
+# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh.
+#
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="sed -e s/^X//"
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+### BEGIN LIBTOOL CONFIG
+EOF
+ cfgfile="$ofile"
+ ;;
+
+*)
+ # Double-quote the variables that need it (for aesthetics).
+ for var in old_AR old_CC old_CFLAGS old_CPPFLAGS \
+ old_MAGIC old_LD old_LDFLAGS old_LIBS \
+ old_LN_S old_NM old_RANLIB old_STRIP \
+ old_AS old_DLLTOOL old_OBJDUMP \
+ old_OBJEXT old_EXEEXT old_reload_flag \
+ old_deplibs_check_method old_file_magic_cmd; do
+ eval "$var=\\\"\$var\\\""
+ done
+
+ # Just create a config file.
+ cfgfile="$ofile.cfg"
+ trap "$rm \"$cfgfile\"; exit 1" 1 2 15
+ echo "creating $cfgfile"
+ $rm "$cfgfile"
+ cat <<EOF > "$cfgfile"
+# `$echo "$cfgfile" | sed 's%^.*/%%'` - Libtool configuration file.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+EOF
+ ;;
+esac
+
+cat <<EOF >> "$cfgfile"
+# Libtool was configured as follows, on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+#
+# AR=$old_AR CC=$old_CC CFLAGS=$old_CFLAGS CPPFLAGS=$old_CPPFLAGS \\
+# MAGIC=$old_MAGIC LD=$old_LD LDFLAGS=$old_LDFLAGS LIBS=$old_LIBS \\
+# LN_S=$old_LN_S NM=$old_NM RANLIB=$old_RANLIB STRIP=$old_STRIP \\
+# AS=$old_AS DLLTOOL=$old_DLLTOOL OBJDUMP=$old_OBJDUMP \\
+# objext=$old_OBJEXT exeext=$old_EXEEXT reload_flag=$old_reload_flag \\
+# deplibs_check_method=$old_deplibs_check_method file_magic_cmd=$old_file_magic_cmd \\
+# $0$ltconfig_args
+#
+# Compiler and other test output produced by $progname, useful for
+# debugging $progname, is in ./config.log if it exists.
+# The version of $progname that generated this script.
+LTCONFIG_VERSION=$LTCONFIG_VERSION
+
+# Shell to use when invoking shell scripts.
+SHELL=$LTSHELL
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# The host system.
+host_alias=$host_alias
+host=$host
+
+# An echo program that does not interpret backslashes.
+echo=$ltecho
+
+# The archiver.
+AR=$AR
+
+# The default C compiler.
+CC=$CC
+
+# The linker used to build libraries.
+LD=$LD
+
+# Whether we need hard or soft links.
+LN_S=$LN_S
+
+# A BSD-compatible nm program.
+NM=$NM
+
+# A symbol stripping program
+STRIP=$STRIP
+
+# Used to examine libraries when file_magic_cmd begins "file"
+MAGIC=$MAGIC
+
+# Used on cygwin: DLL creation program.
+DLLTOOL="$DLLTOOL"
+
+# Used on cygwin: object dumper.
+OBJDUMP="$OBJDUMP"
+
+# Used on cygwin: assembler.
+AS="$AS"
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# How to create reloadable object files.
+reload_flag=$reload_flag
+reload_cmds=$reload_cmds
+
+# How to pass a linker flag through the compiler.
+wl=$wl
+
+# Object file suffix (normally "o").
+objext="$objext"
+
+# Old archive suffix (normally "a").
+libext="$libext"
+
+# Executable file suffix (normally "").
+exeext="$exeext"
+
+# Additional compiler flags for building library objects.
+pic_flag=$pic_flag
+pic_mode=$pic_mode
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$compiler_c_o
+
+# Can we write directly to a .lo ?
+compiler_o_lo=$compiler_o_lo
+
+# Must we lock files when doing compilation ?
+need_locks=$need_locks
+
+# Do we need the lib prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$link_static_flag
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$no_builtin_flag
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$whole_archive_flag_spec
+
+# Compiler flag to generate thread-safe objects.
+thread_safe_flag_spec=$thread_safe_flag_spec
+
+# Library versioning type.
+version_type=$version_type
+
+# Format of library name prefix.
+libname_spec=$libname_spec
+
+# List of archive names. First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME.
+library_names_spec=$library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$soname_spec
+
+# Commands used to build and install an old-style archive.
+RANLIB=$RANLIB
+old_archive_cmds=$old_archive_cmds
+old_postinstall_cmds=$old_postinstall_cmds
+old_postuninstall_cmds=$old_postuninstall_cmds
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$old_archive_from_expsyms_cmds
+
+# Commands used to build and install a shared archive.
+archive_cmds=$archive_cmds
+archive_expsym_cmds=$archive_expsym_cmds
+postinstall_cmds=$postinstall_cmds
+postuninstall_cmds=$postuninstall_cmds
+
+# Commands to strip libraries.
+old_striplib=$old_striplib
+striplib=$striplib
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$deplibs_check_method
+
+# Command to use when deplibs_check_method == file_magic.
+file_magic_cmd=$file_magic_cmd
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$allow_undefined_flag
+
+# Flag that forces no undefined symbols.
+no_undefined_flag=$no_undefined_flag
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$finish_cmds
+
+# Same as above, but a single script fragment to be evaled but not shown.
+finish_eval=$finish_eval
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration
+global_symbol_to_cdecl=$global_symbol_to_cdecl
+
+# This is the shared library runtime path variable.
+runpath_var=$runpath_var
+
+# This is the shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec=$hardcode_libdir_flag_spec
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator=$hardcode_libdir_separator
+
+# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
+# the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Compile-time system search path for libraries
+sys_lib_search_path_spec=$sys_lib_search_path_spec
+
+# Run-time system search path for libraries
+sys_lib_dlsearch_path_spec=$sys_lib_dlsearch_path_spec
+
+# Fix the shell variable \$srcfile for the compiler.
+fix_srcfile_path="$fix_srcfile_path"
+
+# Set to yes if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$export_symbols_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$extract_expsyms_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$include_expsyms
+
+EOF
+
+case "$ltmain" in
+*.sh)
+ echo '### END LIBTOOL CONFIG' >> "$ofile"
+ echo >> "$ofile"
+ case "$host_os" in
+ aix3*)
+ cat <<\EOF >> "$ofile"
+
+# AIX sometimes has problems with the GCC collect2 program. For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+ COLLECT_NAMES=
+ export COLLECT_NAMES
+fi
+EOF
+ ;;
+ esac
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ cat <<'EOF' >> "$ofile"
+ # This is a source program that is used to create dlls on Windows
+ # Don't remove nor modify the starting and closing comments
+# /* ltdll.c starts here */
+# #define WIN32_LEAN_AND_MEAN
+# #include <windows.h>
+# #undef WIN32_LEAN_AND_MEAN
+# #include <stdio.h>
+#
+# #ifndef __CYGWIN__
+# # ifdef __CYGWIN32__
+# # define __CYGWIN__ __CYGWIN32__
+# # endif
+# #endif
+#
+# #ifdef __cplusplus
+# extern "C" {
+# #endif
+# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved);
+# #ifdef __cplusplus
+# }
+# #endif
+#
+# #ifdef __CYGWIN__
+# #include <cygwin/cygwin_dll.h>
+# DECLARE_CYGWIN_DLL( DllMain );
+# #endif
+# HINSTANCE __hDllInstance_base;
+#
+# BOOL APIENTRY
+# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved)
+# {
+# __hDllInstance_base = hInst;
+# return TRUE;
+# }
+# /* ltdll.c ends here */
+ # This is a source program that is used to create import libraries
+ # on Windows for dlls which lack them. Don't remove nor modify the
+ # starting and closing comments
+# /* impgen.c starts here */
+# /* Copyright (C) 1999-2000 Free Software Foundation, Inc.
+#
+# This file is part of GNU libtool.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# */
+#
+# #include <stdio.h> /* for printf() */
+# #include <unistd.h> /* for open(), lseek(), read() */
+# #include <fcntl.h> /* for O_RDONLY, O_BINARY */
+# #include <string.h> /* for strdup() */
+#
+# /* O_BINARY isn't required (or even defined sometimes) under Unix */
+# #ifndef O_BINARY
+# #define O_BINARY 0
+# #endif
+#
+# static unsigned int
+# pe_get16 (fd, offset)
+# int fd;
+# int offset;
+# {
+# unsigned char b[2];
+# lseek (fd, offset, SEEK_SET);
+# read (fd, b, 2);
+# return b[0] + (b[1]<<8);
+# }
+#
+# static unsigned int
+# pe_get32 (fd, offset)
+# int fd;
+# int offset;
+# {
+# unsigned char b[4];
+# lseek (fd, offset, SEEK_SET);
+# read (fd, b, 4);
+# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# static unsigned int
+# pe_as32 (ptr)
+# void *ptr;
+# {
+# unsigned char *b = ptr;
+# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# int
+# main (argc, argv)
+# int argc;
+# char *argv[];
+# {
+# int dll;
+# unsigned long pe_header_offset, opthdr_ofs, num_entries, i;
+# unsigned long export_rva, export_size, nsections, secptr, expptr;
+# unsigned long name_rvas, nexp;
+# unsigned char *expdata, *erva;
+# char *filename, *dll_name;
+#
+# filename = argv[1];
+#
+# dll = open(filename, O_RDONLY|O_BINARY);
+# if (!dll)
+# return 1;
+#
+# dll_name = filename;
+#
+# for (i=0; filename[i]; i++)
+# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':')
+# dll_name = filename + i +1;
+#
+# pe_header_offset = pe_get32 (dll, 0x3c);
+# opthdr_ofs = pe_header_offset + 4 + 20;
+# num_entries = pe_get32 (dll, opthdr_ofs + 92);
+#
+# if (num_entries < 1) /* no exports */
+# return 1;
+#
+# export_rva = pe_get32 (dll, opthdr_ofs + 96);
+# export_size = pe_get32 (dll, opthdr_ofs + 100);
+# nsections = pe_get16 (dll, pe_header_offset + 4 +2);
+# secptr = (pe_header_offset + 4 + 20 +
+# pe_get16 (dll, pe_header_offset + 4 + 16));
+#
+# expptr = 0;
+# for (i = 0; i < nsections; i++)
+# {
+# char sname[8];
+# unsigned long secptr1 = secptr + 40 * i;
+# unsigned long vaddr = pe_get32 (dll, secptr1 + 12);
+# unsigned long vsize = pe_get32 (dll, secptr1 + 16);
+# unsigned long fptr = pe_get32 (dll, secptr1 + 20);
+# lseek(dll, secptr1, SEEK_SET);
+# read(dll, sname, 8);
+# if (vaddr <= export_rva && vaddr+vsize > export_rva)
+# {
+# expptr = fptr + (export_rva - vaddr);
+# if (export_rva + export_size > vaddr + vsize)
+# export_size = vsize - (export_rva - vaddr);
+# break;
+# }
+# }
+#
+# expdata = (unsigned char*)malloc(export_size);
+# lseek (dll, expptr, SEEK_SET);
+# read (dll, expdata, export_size);
+# erva = expdata - export_rva;
+#
+# nexp = pe_as32 (expdata+24);
+# name_rvas = pe_as32 (expdata+32);
+#
+# printf ("EXPORTS\n");
+# for (i = 0; i<nexp; i++)
+# {
+# unsigned long name_rva = pe_as32 (erva+name_rvas+i*4);
+# printf ("\t%s @ %ld ;\n", erva+name_rva, 1+ i);
+# }
+#
+# return 0;
+# }
+# /* impgen.c ends here */
+
+EOF
+ ;;
+ esac
+
+
+ # Append the ltmain.sh script.
+ sed '$q' "$ltmain" >> "$ofile" || (rm -f "$ofile"; exit 1)
+ # We use sed instead of cat because bash on DJGPP gets confused if
+ # if finds mixed CR/LF and LF-only lines. Since sed operates in
+ # text mode, it properly converts lines to CR/LF. This bash problem
+ # is reportedly fixed, but why not run on old versions too?
+
+ chmod +x "$ofile"
+ ;;
+
+*)
+ # Compile the libtool program.
+ echo "FIXME: would compile $ltmain"
+ ;;
+esac
+
+test -n "$cache_file" || exit 0
+
+# AC_CACHE_SAVE
+trap '' 1 2 15
+cat > confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote substitution
+ # turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ -e "s/'/'\\\\''/g" \
+ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+ ;;
+ esac >> confcache
+if cmp -s $cache_file confcache; then
+ :
+else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache > $cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+exit 0
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/rts/gmp/ltmain.sh b/rts/gmp/ltmain.sh
new file mode 100644
index 0000000000..d81d89f878
--- /dev/null
+++ b/rts/gmp/ltmain.sh
@@ -0,0 +1,4692 @@
+# ltmain.sh - Provide generalized library-building support services.
+# NOTE: Changing this file will not affect anything until you rerun ltconfig.
+#
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Check that we have a working $echo.
+if test "X$1" = X--no-reexec; then
+ # Discard the --no-reexec flag, and continue.
+ shift
+elif test "X$1" = X--fallback-echo; then
+ # Avoid inline document here, it may be left over
+ :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+ # Yippee, $echo works!
+ :
+else
+ # Restart under the correct shell, and then maybe $echo will work.
+ exec $SHELL "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+ # used as fallback echo
+ shift
+ cat <<EOF
+$*
+EOF
+ exit 0
+fi
+
+# The name of this program.
+progname=`$echo "$0" | sed 's%^.*/%%'`
+modename="$progname"
+
+# Constants.
+PROGRAM=ltmain.sh
+PACKAGE=libtool
+VERSION=1.3c
+TIMESTAMP=" (1.696 2000/03/14 20:22:42)"
+
+default_mode=
+help="Try \`$progname --help' for more information."
+magic="%%%MAGIC variable%%%"
+mkdir="mkdir"
+mv="mv -f"
+rm="rm -f"
+
+# Sed substitution that helps us do robust quoting. It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
+SP2NL='tr \040 \012'
+NL2SP='tr \015\012 \040\040'
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+# We save the old values to restore during execute mode.
+if test "${LC_ALL+set}" = set; then
+ save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL
+fi
+if test "${LANG+set}" = set; then
+ save_LANG="$LANG"; LANG=C; export LANG
+fi
+
+if test "$LTCONFIG_VERSION" != "$VERSION"; then
+ echo "$modename: ltconfig version \`$LTCONFIG_VERSION' does not match $PROGRAM version \`$VERSION'" 1>&2
+ echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2
+ exit 1
+fi
+
+if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+ echo "$modename: not configured to build any kind of library" 1>&2
+ echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2
+ exit 1
+fi
+
+# Global variables.
+mode=$default_mode
+nonopt=
+prev=
+prevopt=
+run=
+show="$echo"
+show_help=
+execute_dlfiles=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+
+# Parse our command line options once, thoroughly.
+while test $# -gt 0
+do
+ arg="$1"
+ shift
+
+ case "$arg" in
+ -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) optarg= ;;
+ esac
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$prev"; then
+ case "$prev" in
+ execute_dlfiles)
+ eval "$prev=\"\$$prev \$arg\""
+ ;;
+ *)
+ eval "$prev=\$arg"
+ ;;
+ esac
+
+ prev=
+ prevopt=
+ continue
+ fi
+
+ # Have we seen a non-optional argument yet?
+ case "$arg" in
+ --help)
+ show_help=yes
+ ;;
+
+ --version)
+ echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"
+ exit 0
+ ;;
+
+ --config)
+ sed -e '1,/^### BEGIN LIBTOOL CONFIG/d' -e '/^### END LIBTOOL CONFIG/,$d' $0
+ exit 0
+ ;;
+
+ --debug)
+ echo "$progname: enabling shell trace mode"
+ set -x
+ ;;
+
+ --dry-run | -n)
+ run=:
+ ;;
+
+ --features)
+ echo "host: $host"
+ if test "$build_libtool_libs" = yes; then
+ echo "enable shared libraries"
+ else
+ echo "disable shared libraries"
+ fi
+ if test "$build_old_libs" = yes; then
+ echo "enable static libraries"
+ else
+ echo "disable static libraries"
+ fi
+ exit 0
+ ;;
+
+ --finish) mode="finish" ;;
+
+ --mode) prevopt="--mode" prev=mode ;;
+ --mode=*) mode="$optarg" ;;
+
+ --quiet | --silent)
+ show=:
+ ;;
+
+ -dlopen)
+ prevopt="-dlopen"
+ prev=execute_dlfiles
+ ;;
+
+ -*)
+ $echo "$modename: unrecognized option \`$arg'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+
+ *)
+ nonopt="$arg"
+ break
+ ;;
+ esac
+done
+
+if test -n "$prevopt"; then
+ $echo "$modename: option \`$prevopt' requires an argument" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+fi
+
+if test -z "$show_help"; then
+
+ # Infer the operation mode.
+ if test -z "$mode"; then
+ case "$nonopt" in
+ *cc | *++ | gcc* | *-gcc*)
+ mode=link
+ for arg
+ do
+ case "$arg" in
+ -c)
+ mode=compile
+ break
+ ;;
+ esac
+ done
+ ;;
+ *db | *dbx | *strace | *truss)
+ mode=execute
+ ;;
+ *install*|cp|mv)
+ mode=install
+ ;;
+ *rm)
+ mode=uninstall
+ ;;
+ *)
+ # If we have no mode, but dlfiles were specified, then do execute mode.
+ test -n "$execute_dlfiles" && mode=execute
+
+ # Just use the default operation mode.
+ if test -z "$mode"; then
+ if test -n "$nonopt"; then
+ $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
+ else
+ $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
+ fi
+ fi
+ ;;
+ esac
+ fi
+
+ # Only execute mode is allowed to have -dlopen flags.
+ if test -n "$execute_dlfiles" && test "$mode" != execute; then
+ $echo "$modename: unrecognized option \`-dlopen'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Change the help message to a mode-specific one.
+ generic_help="$help"
+ help="Try \`$modename --help --mode=$mode' for more information."
+
+ # These modes are in order of execution frequency so that they run quickly.
+ case "$mode" in
+ # libtool compile mode
+ compile)
+ modename="$modename: compile"
+ # Get the compilation command and the source file.
+ base_compile=
+ prev=
+ lastarg=
+ srcfile="$nonopt"
+ suppress_output=
+
+ user_target=no
+ for arg
+ do
+ case "$prev" in
+ "") ;;
+ xcompiler)
+ # Aesthetically quote the previous argument.
+ prev=
+ lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+
+ case "$arg" in
+ # Double-quote args containing other shell metacharacters.
+ # Many Bourne shells cannot handle close brackets correctly
+ # in scan sets, so we specify it separately.
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+
+ # Add the previous argument to base_compile.
+ if test -z "$base_compile"; then
+ base_compile="$lastarg"
+ else
+ base_compile="$base_compile $lastarg"
+ fi
+ continue
+ ;;
+ esac
+
+ # Accept any command-line options.
+ case "$arg" in
+ -o)
+ if test "$user_target" != "no"; then
+ $echo "$modename: you cannot specify \`-o' more than once" 1>&2
+ exit 1
+ fi
+ user_target=next
+ ;;
+
+ -static)
+ build_old_libs=yes
+ continue
+ ;;
+
+ -Xcompiler)
+ prev=xcompiler
+ continue
+ ;;
+
+ -Wc,*)
+ args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"`
+ lastarg=
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=','
+ for arg in $args; do
+ IFS="$save_ifs"
+
+ # Double-quote args containing other shell metacharacters.
+ # Many Bourne shells cannot handle close brackets correctly
+ # in scan sets, so we specify it separately.
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+ lastarg="$lastarg $arg"
+ done
+ IFS="$save_ifs"
+ lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"`
+
+ # Add the arguments to base_compile.
+ if test -z "$base_compile"; then
+ base_compile="$lastarg"
+ else
+ base_compile="$base_compile $lastarg"
+ fi
+ continue
+ ;;
+ esac
+
+ case "$user_target" in
+ next)
+ # The next one is the -o target name
+ user_target=yes
+ continue
+ ;;
+ yes)
+ # We got the output file
+ user_target=set
+ libobj="$arg"
+ continue
+ ;;
+ esac
+
+ # Accept the current argument as the source file.
+ lastarg="$srcfile"
+ srcfile="$arg"
+
+ # Aesthetically quote the previous argument.
+
+ # Backslashify any backslashes, double quotes, and dollar signs.
+ # These are the only characters that are still specially
+ # interpreted inside of double-quoted scrings.
+ lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
+
+ # Double-quote args containing other shell metacharacters.
+ # Many Bourne shells cannot handle close brackets correctly
+ # in scan sets, so we specify it separately.
+ case "$lastarg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ lastarg="\"$lastarg\""
+ ;;
+ esac
+
+ # Add the previous argument to base_compile.
+ if test -z "$base_compile"; then
+ base_compile="$lastarg"
+ else
+ base_compile="$base_compile $lastarg"
+ fi
+ done
+
+ case "$user_target" in
+ set)
+ ;;
+ no)
+ # Get the name of the library object.
+ libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+ ;;
+ *)
+ $echo "$modename: you must specify a target with \`-o'" 1>&2
+ exit 1
+ ;;
+ esac
+
+ # Recognize several different file suffixes.
+ # If the user specifies -o file.o, it is replaced with file.lo
+ xform='[cCFSfmso]'
+ case "$libobj" in
+ *.ada) xform=ada ;;
+ *.adb) xform=adb ;;
+ *.ads) xform=ads ;;
+ *.asm) xform=asm ;;
+ *.c++) xform=c++ ;;
+ *.cc) xform=cc ;;
+ *.cpp) xform=cpp ;;
+ *.cxx) xform=cxx ;;
+ *.f90) xform=f90 ;;
+ *.for) xform=for ;;
+ esac
+
+ libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
+
+ case "$libobj" in
+ *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;;
+ *)
+ $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2
+ exit 1
+ ;;
+ esac
+
+ if test -z "$base_compile"; then
+ $echo "$modename: you must specify a compilation command" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Delete any leftover library objects.
+ if test "$build_old_libs" = yes; then
+ removelist="$obj $libobj"
+ else
+ removelist="$libobj"
+ fi
+
+ $run $rm $removelist
+ trap "$run $rm $removelist; exit 1" 1 2 15
+
+ # Calculate the filename of the output object if compiler does
+ # not support -o with -c
+ if test "$compiler_c_o" = no; then
+ output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\..*$%%'`.${objext}
+ lockfile="$output_obj.lock"
+ removelist="$removelist $output_obj $lockfile"
+ trap "$run $rm $removelist; exit 1" 1 2 15
+ else
+ need_locks=no
+ lockfile=
+ fi
+
+ # Lock this critical section if it is needed
+ # We use this script file to make the link, it avoids creating a new file
+ if test "$need_locks" = yes; then
+ until ln "$0" "$lockfile" 2>/dev/null; do
+ $show "Waiting for $lockfile to be removed"
+ sleep 2
+ done
+ elif test "$need_locks" = warn; then
+ if test -f "$lockfile"; then
+ echo "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together. If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+ $run $rm $removelist
+ exit 1
+ fi
+ echo $srcfile > "$lockfile"
+ fi
+
+ if test -n "$fix_srcfile_path"; then
+ eval srcfile=\"$fix_srcfile_path\"
+ fi
+
+ # Only build a PIC object if we are building libtool libraries.
+ if test "$build_libtool_libs" = yes; then
+ # Without this assignment, base_compile gets emptied.
+ fbsd_hideous_sh_bug=$base_compile
+
+ if test "$pic_mode" != no; then
+ # All platforms use -DPIC, to notify preprocessed assembler code.
+ command="$base_compile $srcfile $pic_flag -DPIC"
+ else
+ # Don't build PIC code
+ command="$base_compile $srcfile"
+ fi
+ if test "$build_old_libs" = yes; then
+ lo_libobj="$libobj"
+ dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$dir" = "X$libobj"; then
+ dir="$objdir"
+ else
+ dir="$dir/$objdir"
+ fi
+ libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+
+ if test -d "$dir"; then
+ $show "$rm $libobj"
+ $run $rm $libobj
+ else
+ $show "$mkdir $dir"
+ $run $mkdir $dir
+ status=$?
+ if test $status -ne 0 && test ! -d $dir; then
+ exit $status
+ fi
+ fi
+ fi
+ if test "$compiler_o_lo" = yes; then
+ output_obj="$libobj"
+ command="$command -o $output_obj"
+ elif test "$compiler_c_o" = yes; then
+ output_obj="$obj"
+ command="$command -o $output_obj"
+ fi
+
+ $run $rm "$output_obj"
+ $show "$command"
+ if $run eval "$command"; then :
+ else
+ test -n "$output_obj" && $run $rm $removelist
+ exit 1
+ fi
+
+ if test "$need_locks" = warn &&
+ test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+ echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together. If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+ $run $rm $removelist
+ exit 1
+ fi
+
+ # Just move the object if needed, then go on to compile the next one
+ if test x"$output_obj" != x"$libobj"; then
+ $show "$mv $output_obj $libobj"
+ if $run $mv $output_obj $libobj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+
+ # If we have no pic_flag, then copy the object into place and finish.
+ if (test -z "$pic_flag" || test "$pic_mode" != default) &&
+ test "$build_old_libs" = yes; then
+ # Rename the .lo from within objdir to obj
+ if test -f $obj; then
+ $show $rm $obj
+ $run $rm $obj
+ fi
+
+ $show "$mv $libobj $obj"
+ if $run $mv $libobj $obj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+
+ xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$obj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"`
+ libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+ # Now arrange that obj and lo_libobj become the same file
+ $show "(cd $xdir && $LN_S $baseobj $libobj)"
+ if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then
+ exit 0
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+
+ # Allow error messages only from the first compilation.
+ suppress_output=' >/dev/null 2>&1'
+ fi
+
+ # Only build a position-dependent object if we build old libraries.
+ if test "$build_old_libs" = yes; then
+ if test "$pic_mode" != yes; then
+ # Don't build PIC code
+ command="$base_compile $srcfile"
+ else
+ # All platforms use -DPIC, to notify preprocessed assembler code.
+ command="$base_compile $srcfile $pic_flag -DPIC"
+ fi
+ if test "$compiler_c_o" = yes; then
+ command="$command -o $obj"
+ output_obj="$obj"
+ fi
+
+ # Suppress compiler output if we already did a PIC compilation.
+ command="$command$suppress_output"
+ $run $rm "$output_obj"
+ $show "$command"
+ if $run eval "$command"; then :
+ else
+ $run $rm $removelist
+ exit 1
+ fi
+
+ if test "$need_locks" = warn &&
+ test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+ echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together. If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+ $run $rm $removelist
+ exit 1
+ fi
+
+ # Just move the object if needed
+ if test x"$output_obj" != x"$obj"; then
+ $show "$mv $output_obj $obj"
+ if $run $mv $output_obj $obj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+
+ # Create an invalid libtool object if no PIC, so that we do not
+ # accidentally link it into a program.
+ if test "$build_libtool_libs" != yes; then
+ $show "echo timestamp > $libobj"
+ $run eval "echo timestamp > \$libobj" || exit $?
+ else
+ # Move the .lo from within objdir
+ $show "$mv $libobj $lo_libobj"
+ if $run $mv $libobj $lo_libobj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+ fi
+
+ # Unlock the critical section if it was locked
+ if test "$need_locks" != no; then
+ $rm "$lockfile"
+ fi
+
+ exit 0
+ ;;
+
+ # libtool link mode
+ link | relink)
+ modename="$modename: link"
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ # It is impossible to link a dll without this setting, and
+ # we shouldn't force the makefile maintainer to figure out
+ # which system we are compiling for in order to pass an extra
+ # flag for every libtool invokation.
+ # allow_undefined=no
+
+ # FIXME: Unfortunately, there are problems with the above when trying
+ # to make a dll which has undefined symbols, in which case not
+ # even a static library is built. For now, we need to specify
+ # -no-undefined on the libtool link line when we can be certain
+ # that all symbols are satisfied, otherwise we get a static library.
+ allow_undefined=yes
+ ;;
+ *)
+ allow_undefined=yes
+ ;;
+ esac
+ libtool_args="$nonopt"
+ compile_command="$nonopt"
+ finalize_command="$nonopt"
+
+ compile_rpath=
+ finalize_rpath=
+ compile_shlibpath=
+ finalize_shlibpath=
+ convenience=
+ old_convenience=
+ deplibs=
+ old_deplibs=
+ compiler_flags=
+ linker_flags=
+ dllsearchpath=
+ lib_search_path=`pwd`
+
+ avoid_version=no
+ dlfiles=
+ dlprefiles=
+ dlself=no
+ export_dynamic=no
+ export_symbols=
+ export_symbols_regex=
+ generated=
+ libobjs=
+ ltlibs=
+ module=no
+ no_install=no
+ objs=
+ prefer_static_libs=no
+ preload=no
+ prev=
+ prevarg=
+ release=
+ rpath=
+ xrpath=
+ perm_rpath=
+ temp_rpath=
+ thread_safe=no
+ vinfo=
+
+ # We need to know -static, to get the right output filenames.
+ for arg
+ do
+ case "$arg" in
+ -all-static | -static)
+ if test "X$arg" = "X-all-static"; then
+ if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+ $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
+ fi
+ if test -n "$link_static_flag"; then
+ dlopen_self=$dlopen_self_static
+ fi
+ else
+ if test -z "$pic_flag" && test -n "$link_static_flag"; then
+ dlopen_self=$dlopen_self_static
+ fi
+ fi
+ build_libtool_libs=no
+ build_old_libs=yes
+ prefer_static_libs=yes
+ break
+ ;;
+ esac
+ done
+
+ # See if our shared archives depend on static archives.
+ test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+ # Go through the arguments, transforming them on the way.
+ while test $# -gt 0; do
+ arg="$1"
+ shift
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test
+ ;;
+ *) qarg=$arg ;;
+ esac
+ libtool_args="$libtool_args $qarg"
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$prev"; then
+ case "$prev" in
+ output)
+ compile_command="$compile_command @OUTPUT@"
+ finalize_command="$finalize_command @OUTPUT@"
+ ;;
+ esac
+
+ case "$prev" in
+ dlfiles|dlprefiles)
+ if test "$preload" = no; then
+ # Add the symbol object into the linking commands.
+ compile_command="$compile_command @SYMFILE@"
+ finalize_command="$finalize_command @SYMFILE@"
+ preload=yes
+ fi
+ case "$arg" in
+ *.la | *.lo) ;; # We handle these cases below.
+ force)
+ if test "$dlself" = no; then
+ dlself=needless
+ export_dynamic=yes
+ fi
+ prev=
+ continue
+ ;;
+ self)
+ if test "$prev" = dlprefiles; then
+ dlself=yes
+ elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+ dlself=yes
+ else
+ dlself=needless
+ export_dynamic=yes
+ fi
+ prev=
+ continue
+ ;;
+ *)
+ if test "$prev" = dlfiles; then
+ dlfiles="$dlfiles $arg"
+ else
+ dlprefiles="$dlprefiles $arg"
+ fi
+ prev=
+ continue
+ ;;
+ esac
+ ;;
+ expsyms)
+ export_symbols="$arg"
+ if test ! -f "$arg"; then
+ $echo "$modename: symbol file \`$arg' does not exist"
+ exit 1
+ fi
+ prev=
+ continue
+ ;;
+ expsyms_regex)
+ export_symbols_regex="$arg"
+ prev=
+ continue
+ ;;
+ release)
+ release="-$arg"
+ prev=
+ continue
+ ;;
+ rpath | xrpath)
+ # We need an absolute path.
+ case "$arg" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ $echo "$modename: only absolute run-paths are allowed" 1>&2
+ exit 1
+ ;;
+ esac
+ if test "$prev" = rpath; then
+ case "$rpath " in
+ *" $arg "*) ;;
+ *) rpath="$rpath $arg" ;;
+ esac
+ else
+ case "$xrpath " in
+ *" $arg "*) ;;
+ *) xrpath="$xrpath $arg" ;;
+ esac
+ fi
+ prev=
+ continue
+ ;;
+ xcompiler)
+ compiler_flags="$compiler_flags $qarg"
+ prev=
+ compile_command="$compile_command $qarg"
+ finalize_command="$finalize_command $qarg"
+ continue
+ ;;
+ xlinker)
+ linker_flags="$linker_flags $qarg"
+ compiler_flags="$compiler_flags $wl$qarg"
+ prev=
+ compile_command="$compile_command $wl$qarg"
+ finalize_command="$finalize_command $wl$qarg"
+ continue
+ ;;
+ *)
+ eval "$prev=\"\$arg\""
+ prev=
+ continue
+ ;;
+ esac
+ fi
+
+ prevarg="$arg"
+
+ case "$arg" in
+ -all-static)
+ if test -n "$link_static_flag"; then
+ compile_command="$compile_command $link_static_flag"
+ finalize_command="$finalize_command $link_static_flag"
+ fi
+ continue
+ ;;
+
+ -allow-undefined)
+ # FIXME: remove this flag sometime in the future.
+ $echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2
+ continue
+ ;;
+
+ -avoid-version)
+ avoid_version=yes
+ continue
+ ;;
+
+ -dlopen)
+ prev=dlfiles
+ continue
+ ;;
+
+ -dlpreopen)
+ prev=dlprefiles
+ continue
+ ;;
+
+ -export-dynamic)
+ export_dynamic=yes
+ continue
+ ;;
+
+ -export-symbols | -export-symbols-regex)
+ if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+ $echo "$modename: not more than one -exported-symbols argument allowed"
+ exit 1
+ fi
+ if test "X$arg" = "X-export-symbols"; then
+ prev=expsyms
+ else
+ prev=expsyms_regex
+ fi
+ continue
+ ;;
+
+ -L*)
+ dir=`$echo "X$arg" | $Xsed -e 's/^-L//'`
+ # We need an absolute path.
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ absdir=`cd "$dir" && pwd`
+ if test -z "$absdir"; then
+ $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
+ exit 1
+ fi
+ dir="$absdir"
+ ;;
+ esac
+ case "$deplibs " in
+ *" -L$dir "*) ;;
+ *)
+ deplibs="$deplibs -L$dir"
+ lib_search_path="$lib_search_path $dir"
+ ;;
+ esac
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ case ":$dllsearchpath:" in
+ *":$dir:"*) ;;
+ *) dllsearchpath="$dllsearchpath:$dir";;
+ esac
+ ;;
+ esac
+ continue
+ ;;
+
+ -l*)
+ if test "$arg" = "-lc"; then
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
+ # These systems don't actually have c library (as such)
+ continue
+ ;;
+ esac
+ elif test "$arg" = "-lm"; then
+ case "$host" in
+ *-*-cygwin* | *-*-beos*)
+ # These systems don't actually have math library (as such)
+ continue
+ ;;
+ esac
+ fi
+ deplibs="$deplibs $arg"
+ continue
+ ;;
+
+ -module)
+ module=yes
+ continue
+ ;;
+
+ -no-fast-install)
+ fast_install=no
+ continue
+ ;;
+
+ -no-install)
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ # The PATH hackery in wrapper scripts is required on Windows
+ # in order for the loader to find any dlls it needs.
+ $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2
+ $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2
+ fast_install=no
+ ;;
+ *)
+ no_install=yes
+ ;;
+ esac
+ continue
+ ;;
+
+ -no-undefined)
+ allow_undefined=no
+ continue
+ ;;
+
+ -o) prev=output ;;
+
+ -release)
+ prev=release
+ continue
+ ;;
+
+ -rpath)
+ prev=rpath
+ continue
+ ;;
+
+ -R)
+ prev=xrpath
+ continue
+ ;;
+
+ -R*)
+ dir=`$echo "X$arg" | $Xsed -e 's/^-R//'`
+ # We need an absolute path.
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ $echo "$modename: only absolute run-paths are allowed" 1>&2
+ exit 1
+ ;;
+ esac
+ case "$xrpath " in
+ *" $dir "*) ;;
+ *) xrpath="$xrpath $dir" ;;
+ esac
+ continue
+ ;;
+
+ -static)
+ # If we have no pic_flag, then this is the same as -all-static.
+ if test -z "$pic_flag" && test -n "$link_static_flag"; then
+ compile_command="$compile_command $link_static_flag"
+ finalize_command="$finalize_command $link_static_flag"
+ fi
+ continue
+ ;;
+
+ -thread-safe)
+ thread_safe=yes
+ continue
+ ;;
+
+ -version-info)
+ prev=vinfo
+ continue
+ ;;
+
+ -Wc,*)
+ args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'`
+ arg=
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=','
+ for flag in $args; do
+ IFS="$save_ifs"
+ case "$flag" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ flag="\"$flag\""
+ ;;
+ esac
+ arg="$arg $wl$flag"
+ compiler_flags="$compiler_flags $flag"
+ done
+ IFS="$save_ifs"
+ arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+ ;;
+
+ -Wl,*)
+ args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'`
+ arg=
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=','
+ for flag in $args; do
+ IFS="$save_ifs"
+ case "$flag" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ flag="\"$flag\""
+ ;;
+ esac
+ arg="$arg $wl$flag"
+ compiler_flags="$compiler_flags $wl$flag"
+ linker_flags="$linker_flags $flag"
+ done
+ IFS="$save_ifs"
+ arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+ ;;
+
+ -Xcompiler)
+ prev=xcompiler
+ continue
+ ;;
+
+ -Xlinker)
+ prev=xlinker
+ continue
+ ;;
+
+ # Some other compiler flag.
+ -* | +*)
+ # Unknown arguments in both finalize_command and compile_command need
+ # to be aesthetically quoted because they are evaled later.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+ ;;
+
+ *.$objext)
+ # A standard object.
+ objs="$objs $arg"
+ ;;
+
+ *.lo)
+ # A library object.
+ if test "$prev" = dlfiles; then
+ # This file was specified with -dlopen.
+ if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+ dlfiles="$dlfiles $arg"
+ prev=
+ continue
+ else
+ # If libtool objects are unsupported, then we need to preload.
+ prev=dlprefiles
+ fi
+ fi
+
+ if test "$prev" = dlprefiles; then
+ # Preload the old-style object.
+ dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"`
+ prev=
+ else
+ libobjs="$libobjs $arg"
+ fi
+ ;;
+
+ *.$libext)
+ # An archive.
+ deplibs="$deplibs $arg"
+ old_deplibs="$old_deplibs $arg"
+ continue
+ ;;
+
+ *.la)
+ # A libtool-controlled library.
+
+ if test "$prev" = dlfiles; then
+ # This library was specified with -dlopen.
+ dlfiles="$dlfiles $arg"
+ prev=
+ elif test "$prev" = dlprefiles; then
+ # The library was specified with -dlpreopen.
+ dlprefiles="$dlprefiles $arg"
+ prev=
+ else
+ deplibs="$deplibs $arg"
+ fi
+ continue
+ ;;
+
+ # Some other compiler argument.
+ *)
+ # Unknown arguments in both finalize_command and compile_command need
+ # to be aesthetically quoted because they are evaled later.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+ ;;
+ esac
+
+ # Now actually substitute the argument into the commands.
+ if test -n "$arg"; then
+ compile_command="$compile_command $arg"
+ finalize_command="$finalize_command $arg"
+ fi
+ done
+
+ if test -n "$prev"; then
+ $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+ eval arg=\"$export_dynamic_flag_spec\"
+ compile_command="$compile_command $arg"
+ finalize_command="$finalize_command $arg"
+ fi
+
+ oldlibs=
+ # calculate the name of the file, without its directory
+ outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'`
+ libobjs_save="$libobjs"
+
+ if test -n "$shlibpath_var"; then
+ # get the directories listed in $shlibpath_var
+ eval shlib_search_path=\`\$echo \"X \${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
+ else
+ shlib_search_path=
+ fi
+ eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+ eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+ lib_search_path="$lib_search_path $sys_lib_search_path $shlib_search_path"
+
+ output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$output_objdir" = "X$output"; then
+ output_objdir="$objdir"
+ else
+ output_objdir="$output_objdir/$objdir"
+ fi
+ # Create the object directory.
+ if test ! -d $output_objdir; then
+ $show "$mkdir $output_objdir"
+ $run $mkdir $output_objdir
+ status=$?
+ if test $status -ne 0 && test ! -d $output_objdir; then
+ exit $status
+ fi
+ fi
+
+ case "$output" in
+ "")
+ $echo "$modename: you must specify an output file" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+ *.$libext)
+ linkmode=oldlib ;;
+ *.lo | *.$objext)
+ linkmode=obj ;;
+ *.la)
+ linkmode=lib ;;
+ *) # Anything else should be a program.
+ linkmode=prog ;;
+ esac
+
+ specialdeplibs=
+ libs=
+ # Find all interdependent deplibs that
+ # are linked more than once (e.g. -la -lb -la)
+ for deplib in $deplibs; do
+ case "$libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ libs="$libs $deplib"
+ done
+ deplibs=
+ newdependency_libs=
+ uninst_path= # paths that contain uninstalled libtool libraries
+ new_lib_search_path=
+ need_relink=no # whether we're linking any uninstalled libtool libraries
+ case $linkmode in
+ lib)
+ passes="link"
+ for file in $dlfiles $dlprefiles; do
+ case "$file" in
+ *.la) ;;
+ *)
+ $echo "$modename: libraries can \`-dlopen' only libtool libraries" 1>&2
+ exit 1
+ ;;
+ esac
+ done
+ ;;
+ prog)
+ compile_deplibs=
+ finalize_deplibs=
+ alldeplibs=no
+ newdlfiles=
+ newdlprefiles=
+ link_against_libtool_libs=
+ passes="scan dlopen dlpreopen link"
+ ;;
+ *) passes="link"
+ ;;
+ esac
+ for pass in $passes; do
+ if test $linkmode = prog; then
+ case $pass in
+ dlopen) libs="$dlfiles" ;;
+ dlpreopen) libs="$dlprefiles" ;;
+ link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+ esac
+ fi
+ if test $pass = dlopen; then
+ # Collect dlpreopened libraries
+ save_deplibs="$deplibs"
+ deplibs=
+ fi
+ for deplib in $libs; do
+ lib=
+ found=no
+ case "$deplib" in
+ -l*)
+ if test $linkmode != lib && test $linkmode != prog; then
+ $echo "$modename: warning: \`-l' is ignored for archives/objects" 1>&2
+ continue
+ fi
+ name=`$echo "X$deplib" | $Xsed -e 's/^-l//'`
+ for searchdir in $lib_search_path; do
+ # Search the libtool library
+ lib="$searchdir/lib${name}.la"
+ if test -f "$lib"; then
+ found=yes
+ break
+ fi
+ done
+ if test "$found" != yes; then
+ if test "$linkmode,$pass" = "prog,link"; then
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ else
+ deplibs="$deplib $deplibs"
+ test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs"
+ fi
+ continue
+ fi
+ ;;
+ -L*)
+ case $linkmode in
+ lib)
+ deplibs="$deplib $deplibs"
+ newdependency_libs="$deplib $newdependency_libs"
+ new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+ ;;
+ prog)
+ if test $pass = scan; then
+ deplibs="$deplib $deplibs"
+ new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+ else
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ fi
+ ;;
+ *)
+ $echo "$modename: warning: \`-L' is ignored for archives/objects" 1>&2
+ ;;
+ esac
+ continue
+ ;;
+ -R*)
+ if test "$linkmode,$pass" = "prog,link"; then
+ dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+ # Make sure the xrpath contains only unique directories.
+ case "$xrpath " in
+ *" $dir "*) ;;
+ *) xrpath="$xrpath $dir" ;;
+ esac
+ fi
+ continue
+ ;;
+ *.la) lib="$deplib" ;;
+ *.$libext)
+ case $linkmode in
+ lib)
+ if test "$deplibs_check_method" != pass_all; then
+ echo
+ echo "*** Warning: This library needs some functionality provided by $deplib."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ else
+ echo
+ echo "*** Warning: Linking the shared library $output against the"
+ echo "*** static library $deplib is not portable!"
+ deplibs="$deplib $deplibs"
+ fi
+ continue
+ ;;
+ prog)
+ if test $pass != link; then
+ deplibs="$deplib $deplibs"
+ else
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ fi
+ continue
+ ;;
+ esac
+ ;;
+ *.lo | *.$objext)
+ if test $linkmode = prog; then
+ if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+ # If there is no dlopen support or we're linking statically,
+ # we need to preload.
+ newdlprefiles="$newdlprefiles $deplib"
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ else
+ newdlfiles="$newdlfiles $deplib"
+ fi
+ fi
+ continue
+ ;;
+ %DEPLIBS%)
+ alldeplibs=yes
+ continue
+ ;;
+ esac
+ if test $found = yes || test -f "$lib"; then :
+ else
+ $echo "$modename: cannot find the library \`$lib'" 1>&2
+ exit 1
+ fi
+
+ # Check to see that this really is a libtool archive.
+ if (sed -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+ else
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+
+ ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$ladir" = "X$lib" && ladir="."
+
+ dlname=
+ dlopen=
+ dlpreopen=
+ libdir=
+ library_names=
+ old_library=
+ # If the library was installed with an old release of libtool,
+ # it will not redefine variable installed.
+ installed=yes
+
+ # Read the .la file
+ case "$lib" in
+ */* | *\\*) . $lib ;;
+ *) . ./$lib ;;
+ esac
+
+ if test $linkmode = lib || test "$linkmode,$pass" = "prog,scan"; then
+ test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
+ test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
+ fi
+
+ if test $linkmode != lib && test $linkmode != prog; then
+ # only check for convenience libraries
+ if test -z "$old_library"; then
+ $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+ exit 1
+ fi
+ if test -n "$libdir"; then
+ $echo "$modename: \`$lib' is not a convenience library" 1>&2
+ exit 1
+ fi
+ # It is a libtool convenience library, so add in its objects.
+ convenience="$convenience $ladir/$objdir/$old_library"
+ old_convenience="$old_convenience $ladir/$objdir/$old_library"
+ continue
+ fi
+
+ # Get the name of the library we link against.
+ linklib=
+ for l in $old_library $library_names; do
+ linklib="$l"
+ done
+ if test -z "$linklib"; then
+ $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+ exit 1
+ fi
+
+ # This library was specified with -dlopen.
+ if test $pass = dlopen; then
+ if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+ # If there is no dlname, no dlopen support or we're linking statically,
+ # we need to preload.
+ dlprefiles="$dlprefiles $lib"
+ else
+ newdlfiles="$newdlfiles $lib"
+ fi
+ continue
+ fi
+
+ # We need an absolute path.
+ case "$ladir" in
+ [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+ *)
+ abs_ladir=`cd "$ladir" && pwd`
+ if test -z "$abs_ladir"; then
+ $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2
+ $echo "$modename: passing it literally to the linker, although it might fail" 1>&2
+ abs_ladir="$ladir"
+ fi
+ ;;
+ esac
+ laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+
+ # Find the relevant object directory and library name.
+ if test "X$installed" = Xyes; then
+ if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+ $echo "$modename: warning: library \`$lib' was moved." 1>&2
+ dir="$ladir"
+ absdir="$abs_ladir"
+ libdir="$abs_ladir"
+ else
+ dir="$libdir"
+ absdir="$libdir"
+ fi
+ else
+ dir="$ladir/$objdir"
+ absdir="$abs_ladir/$objdir"
+ # Remove this search path later
+ uninst_path="$uninst_path $abs_ladir"
+ fi
+ name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+
+ # This library was specified with -dlpreopen.
+ if test $pass = dlpreopen; then
+ # Prefer using a static library (so that no silly _DYNAMIC symbols
+ # are required to link).
+ if test -n "$old_library"; then
+ newdlprefiles="$newdlprefiles $dir/$old_library"
+ else
+ newdlprefiles="$newdlprefiles $dir/$linklib"
+ fi
+ fi
+
+ if test $linkmode = prog && test $pass != link; then
+ new_lib_search_path="$new_lib_search_path $ladir"
+ deplibs="$lib $deplibs"
+
+ linkalldeplibs=no
+ if test "$link_all_deplibs" != no || test "$fast_install" != no || \
+ test "$build_libtool_libs" = no || test -z "$library_names"; then
+ linkalldeplibs=yes
+ fi
+
+ tmp_libs=
+ for deplib in $dependency_libs; do
+ case "$deplib" in
+ -L*) new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test
+ esac
+ # Need to link against all dependency_libs?
+ if test $linkalldeplibs = yes; then
+ deplibs="$deplib $deplibs"
+ else
+ # Need to hardcode shared library paths
+ # or/and link against static libraries
+ newdependency_libs="$deplib $newdependency_libs"
+ fi
+ case "$tmp_libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ tmp_libs="$tmp_libs $deplib"
+ done
+ continue
+ fi
+
+ if test -z "$libdir"; then
+ # It is a libtool convenience library, so add in its objects.
+ convenience="$convenience $dir/$old_library"
+ old_convenience="$old_convenience $dir/$old_library"
+ if test $linkmode = lib; then
+ deplibs="$dir/$old_library $deplibs"
+ tmp_libs=
+ for deplib in $dependency_libs; do
+ newdependency_libs="$deplib $newdependency_libs"
+ case "$tmp_libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ tmp_libs="$tmp_libs $deplib"
+ done
+ elif test "$linkmode,$pass" = "prog,link"; then
+ compile_deplibs="$dir/$old_library $compile_deplibs"
+ finalize_deplibs="$dir/$old_library $finalize_deplibs"
+ fi
+ continue
+ fi
+
+ if test "$linkmode,$pass" = "prog,link"; then
+ if test -n "$library_names" &&
+ { test "$hardcode_into_libs" != all || test "$alldeplibs" != yes; } &&
+ { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+ # We need to hardcode the library path
+ if test -n "$shlibpath_var"; then
+ # Make sure the rpath contains only unique directories.
+ case "$temp_rpath " in
+ *" $dir "*) ;;
+ *" $absdir "*) ;;
+ *) temp_rpath="$temp_rpath $dir" ;;
+ esac
+ fi
+
+ # Hardcode the library path.
+ # Skip directories that are in the system default run-time
+ # search path.
+ case " $sys_lib_dlsearch_path " in
+ *" $absdir "*) ;;
+ *)
+ case "$compile_rpath " in
+ *" $absdir "*) ;;
+ *) compile_rpath="$compile_rpath $absdir"
+ esac
+ ;;
+ esac
+
+ case " $sys_lib_dlsearch_path " in
+ *" $libdir "*) ;;
+ *)
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir"
+ esac
+ ;;
+ esac
+ fi
+
+ if test "$alldeplibs" = yes &&
+ { test "$deplibs_check_method" = pass_all ||
+ { test "$build_libtool_libs" = yes &&
+ test -n "$library_names"; }; }; then
+ # Do we only need to link against static libraries?
+ continue
+ fi
+ fi
+
+ link_static=no # Whether this library is linked statically
+ if test -n "$library_names" &&
+ { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+ link_against_libtool_libs="$link_against_libtool_libs $lib"
+ test "X$installed" = xno && need_relink=yes
+ # This is a shared library
+ if test $linkmode = lib && test "$hardcode_into_libs" = all; then
+ # Hardcode the library path.
+ # Skip directories that are in the system default run-time
+ # search path.
+ case " $sys_lib_dlsearch_path " in
+ *" $absdir "*) ;;
+ *)
+ case "$compile_rpath " in
+ *" $absdir "*) ;;
+ *) compile_rpath="$compile_rpath $absdir"
+ esac
+ ;;
+ esac
+ case " $sys_lib_dlsearch_path " in
+ *" $libdir "*) ;;
+ *)
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir"
+ esac
+ ;;
+ esac
+ fi
+
+ if test -n "$old_archive_from_expsyms_cmds"; then
+ # figure out the soname
+ set dummy $library_names
+ realname="$2"
+ shift; shift
+ libname=`eval \\$echo \"$libname_spec\"`
+ if test -n "$soname_spec"; then
+ eval soname=\"$soname_spec\"
+ else
+ soname="$realname"
+ fi
+
+ # Make a new name for the extract_expsyms_cmds to use
+ newlib="libimp-`echo $soname | sed 's/^lib//;s/\.dll$//'`.a"
+
+ # If the library has no export list, then create one now
+ if test -f "$output_objdir/$soname-def"; then :
+ else
+ $show "extracting exported symbol list from \`$soname'"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ eval cmds=\"$extract_expsyms_cmds\"
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ fi
+
+ # Create $newlib
+ if test -f "$output_objdir/$newlib"; then :; else
+ $show "generating import library for \`$soname'"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ eval cmds=\"$old_archive_from_expsyms_cmds\"
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ fi
+ # make sure the library variables are pointing to the new library
+ dir=$output_objdir
+ linklib=$newlib
+ fi
+
+ if test $linkmode = prog || test "$mode" != relink; then
+ add_shlibpath=
+ add_dir=
+ add=
+ lib_linked=yes
+ case "$hardcode_action" in
+ immediate | unsupported)
+ if test "$hardcode_direct" = no; then
+ add="$dir/$linklib"
+ elif test "$hardcode_minus_L" = no; then
+ case "$host" in
+ *-*-sunos*) add_shlibpath="$dir" ;;
+ esac
+ add_dir="-L$dir"
+ add="-l$name"
+ elif test "$hardcode_shlibpath_var" = no; then
+ add_shlibpath="$dir"
+ add="-l$name"
+ else
+ lib_linked=no
+ fi
+ ;;
+ relink)
+ if test "$hardcode_direct" = yes; then
+ add="$dir/$linklib"
+ elif test "$hardcode_minus_L" = yes; then
+ add_dir="-L$dir"
+ add="-l$name"
+ elif test "$hardcode_shlibpath_var" = yes; then
+ add_shlibpath="$dir"
+ add="-l$name"
+ else
+ lib_linked=no
+ fi
+ ;;
+ *) lib_linked=no ;;
+ esac
+
+ if test "$lib_linked" != yes; then
+ $echo "$modename: configuration error: unsupported hardcode properties"
+ exit 1
+ fi
+
+ if test -n "$add_shlibpath"; then
+ case ":$compile_shlibpath:" in
+ *":$add_shlibpath:"*) ;;
+ *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
+ esac
+ fi
+ if test $linkmode = prog; then
+ test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+ test -n "$add" && compile_deplibs="$add $compile_deplibs"
+ else
+ test -n "$add_dir" && deplibs="$add_dir $deplibs"
+ test -n "$add" && deplibs="$add $deplibs"
+ if test "$hardcode_direct" != yes && \
+ test "$hardcode_minus_L" != yes && \
+ test "$hardcode_shlibpath_var" = yes; then
+ case ":$finalize_shlibpath:" in
+ *":$libdir:"*) ;;
+ *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+ esac
+ fi
+ fi
+ fi
+
+ if test $linkmode = prog || test "$mode" = relink; then
+ add_shlibpath=
+ add_dir=
+ add=
+ # Finalize command for both is simple: just hardcode it.
+ if test "$hardcode_direct" = yes; then
+ add="$libdir/$linklib"
+ elif test "$hardcode_minus_L" = yes; then
+ add_dir="-L$libdir"
+ add="-l$name"
+ elif test "$hardcode_shlibpath_var" = yes; then
+ case ":$finalize_shlibpath:" in
+ *":$libdir:"*) ;;
+ *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+ esac
+ add="-l$name"
+ else
+ # We cannot seem to hardcode it, guess we'll fake it.
+ add_dir="-L$libdir"
+ add="-l$name"
+ fi
+
+ if test $linkmode = prog; then
+ test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+ test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+ else
+ test -n "$add_dir" && deplibs="$add_dir $deplibs"
+ test -n "$add" && deplibs="$add deplibs"
+ fi
+ fi
+ elif test $linkmode = prog; then
+ # Here we assume that one of hardcode_direct or hardcode_minus_L
+ # is not unsupported. This is valid on all known static and
+ # shared platforms.
+ if test "$hardcode_direct" != unsupported; then
+ test -n "$old_library" && linklib="$old_library"
+ compile_deplibs="$dir/$linklib $compile_deplibs"
+ finalize_deplibs="$dir/$linklib $finalize_deplibs"
+ else
+ compile_deplibs="-l$name -L$dir $compile_deplibs"
+ finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+ fi
+ elif test "$build_libtool_libs" = yes; then
+ # Not a shared library
+ if test "$deplibs_check_method" != pass_all; then
+ # We're trying link a shared library against a static one
+ # but the system doesn't support it.
+ # Just print a warning and add the library to dependency_libs so
+ # that the program can be linked against the static library.
+ echo
+ echo "*** Warning: This library needs some functionality provided by $lib."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ else
+ convenience="$convenience $dir/$old_library"
+ old_convenience="$old_convenience $dir/$old_library"
+ deplibs="$dir/$old_library $deplibs"
+ link_static=yes
+ fi
+ fi
+
+ if test $linkmode = lib; then
+ if test -n "$dependency_libs" &&
+ { test "$hardcode_into_libs" = no || test $build_old_libs = yes ||
+ test $link_static = yes; }; then
+ # Extract -R from dependency_libs
+ temp_deplibs=
+ for libdir in $dependency_libs; do
+ case "$libdir" in
+ -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'`
+ case " $xrpath " in
+ *" $temp_xrpath "*) ;;
+ *) xrpath="$xrpath $temp_xrpath";;
+ esac;;
+ *) temp_deplibs="$temp_deplibs $libdir";;
+ esac
+ done
+ dependency_libs="$temp_deplibs"
+ fi
+
+ new_lib_search_path="$new_lib_search_path $absdir"
+ # Link against this library
+ test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+ # ... and its dependency_libs
+ tmp_libs=
+ for deplib in $dependency_libs; do
+ newdependency_libs="$deplib $newdependency_libs"
+ case "$tmp_libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ tmp_libs="$tmp_libs $deplib"
+ done
+
+ if test $link_all_deplibs != no; then
+ # Add the search paths of all dependency libraries
+ for deplib in $dependency_libs; do
+ case "$deplib" in
+ -L*) path="$deplib" ;;
+ *.la)
+ dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$dir" = "X$deplib" && dir="."
+ # We need an absolute path.
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+ *)
+ absdir=`cd "$dir" && pwd`
+ if test -z "$absdir"; then
+ $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2
+ absdir="$dir"
+ fi
+ ;;
+ esac
+ if grep "^installed=no" $deplib > /dev/null; then
+ path="-L$absdir/$objdir"
+ else
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ if test "$absdir" != "$libdir"; then
+ $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2
+ fi
+ path="-L$absdir"
+ fi
+ ;;
+ *) continue ;;
+ esac
+ case " $deplibs " in
+ *" $path "*) ;;
+ *) deplibs="$deplibs $path" ;;
+ esac
+ done
+ fi
+ fi
+ done
+ dependency_libs="$newdependency_libs"
+ if test $pass = dlpreopen; then
+ # Link the dlpreopened libraries before other libraries
+ deplibs="$deplibs $save_deplibs"
+ elif test $pass != dlopen; then
+ # Make sure lib_search_path contains only unique directories.
+ lib_search_path=
+ for dir in $new_lib_search_path; do
+ case "$lib_search_path " in
+ *" $dir "*) ;;
+ *) lib_search_path="$lib_search_path $dir" ;;
+ esac
+ done
+ lib_search_path="$lib_search_path $sys_lib_search_path"
+
+ if test "$linkmode,$pass" != "prog,link"; then
+ vars="deplibs"
+ else
+ vars="compile_deplibs finalize_deplibs"
+ fi
+ for var in $vars dependency_libs; do
+ # Make sure that $var contains only unique libraries
+ # and add them in reverse order
+ eval tmp_libs=\"\$$var\"
+ new_libs=
+ for deplib in $tmp_libs; do
+ case "$deplib" in
+ -L*) new_libs="$deplib $new_libs" ;;
+ *)
+ case " $specialdeplibs " in
+ *" $deplib "*) new_libs="$deplib $new_libs" ;;
+ *)
+ case " $new_libs " in
+ *" $deplib "*) ;;
+ *) new_libs="$deplib $new_libs" ;;
+ esac
+ ;;
+ esac
+ ;;
+ esac
+ done
+ tmp_libs=
+ for deplib in $new_libs; do
+ case "$deplib" in
+ -L*)
+ case " $tmp_libs " in
+ *" $deplib "*) ;;
+ *) tmp_libs="$tmp_libs $deplib" ;;
+ esac
+ ;;
+ *) tmp_libs="$tmp_libs $deplib" ;;
+ esac
+ done
+ eval $var=\"$tmp_libs\"
+ done
+ fi
+ done
+ if test $linkmode = prog; then
+ dlfiles="$newdlfiles"
+ dlprefiles="$newdlprefiles"
+ fi
+
+ case $linkmode in
+ oldlib)
+ if test -n "$deplibs"; then
+ $echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2
+ fi
+
+ if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+ $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$rpath"; then
+ $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$xrpath"; then
+ $echo "$modename: warning: \`-R' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+ $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2
+ fi
+
+ # Now set the variables for building old libraries.
+ build_libtool_libs=no
+ oldlibs="$output"
+ objs="$objs$old_deplibs"
+ ;;
+
+ lib)
+ # Make sure we only generate libraries of the form `libNAME.la'.
+ case "$outputname" in
+ lib*)
+ name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+ eval libname=\"$libname_spec\"
+ ;;
+ *)
+ if test "$module" = no; then
+ $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+ if test "$need_lib_prefix" != no; then
+ # Add the "lib" prefix for modules if required
+ name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+ eval libname=\"$libname_spec\"
+ else
+ libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+ fi
+ ;;
+ esac
+
+ if test -n "$objs"; then
+ if test "$deplibs_check_method" != pass_all; then
+ $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1
+ exit 1
+ else
+ echo
+ echo "*** Warning: Linking the shared library $output against the non-libtool"
+ echo "*** objects $objs is not portable!"
+ libobjs="$libobjs $objs"
+ fi
+ fi
+
+ if test "$dlself" != no; then
+ $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2
+ fi
+
+ set dummy $rpath
+ if test $# -gt 2; then
+ $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
+ fi
+ install_libdir="$2"
+
+ oldlibs=
+ if test -z "$rpath"; then
+ if test "$build_libtool_libs" = yes; then
+ # Building a libtool convenience library.
+ libext=al
+ oldlibs="$output_objdir/$libname.$libext $oldlibs"
+ build_libtool_libs=convenience
+ build_old_libs=yes
+ fi
+
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2
+ fi
+ else
+
+ # Parse the version information argument.
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=':'
+ set dummy $vinfo 0 0 0
+ IFS="$save_ifs"
+
+ if test -n "$8"; then
+ $echo "$modename: too many parameters to \`-version-info'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ current="$2"
+ revision="$3"
+ age="$4"
+
+ # Check that each of the things are valid numbers.
+ case "$current" in
+ 0 | [1-9] | [1-9][0-9]*) ;;
+ *)
+ $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ ;;
+ esac
+
+ case "$revision" in
+ 0 | [1-9] | [1-9][0-9]*) ;;
+ *)
+ $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ ;;
+ esac
+
+ case "$age" in
+ 0 | [1-9] | [1-9][0-9]*) ;;
+ *)
+ $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ ;;
+ esac
+
+ if test $age -gt $current; then
+ $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ fi
+
+ # Calculate the version variables.
+ major=
+ versuffix=
+ verstring=
+ case "$version_type" in
+ none) ;;
+
+ irix)
+ major=`expr $current - $age + 1`
+ versuffix="$major.$revision"
+ verstring="sgi$major.$revision"
+
+ # Add in all the interfaces that we are compatible with.
+ loop=$revision
+ while test $loop != 0; do
+ iface=`expr $revision - $loop`
+ loop=`expr $loop - 1`
+ verstring="sgi$major.$iface:$verstring"
+ done
+ ;;
+
+ linux)
+ major=.`expr $current - $age`
+ versuffix="$major.$age.$revision"
+ ;;
+
+ osf)
+ major=`expr $current - $age`
+ versuffix=".$current.$age.$revision"
+ verstring="$current.$age.$revision"
+
+ # Add in all the interfaces that we are compatible with.
+ loop=$age
+ while test $loop != 0; do
+ iface=`expr $current - $loop`
+ loop=`expr $loop - 1`
+ verstring="$verstring:${iface}.0"
+ done
+
+ # Make executables depend on our current version.
+ verstring="$verstring:${current}.0"
+ ;;
+
+ sunos)
+ major=".$current"
+ versuffix=".$current.$revision"
+ ;;
+
+ freebsd-aout)
+ major=".$current"
+ versuffix=".$current.$revision";
+ ;;
+
+ freebsd-elf)
+ major=".$current"
+ versuffix=".$current";
+ ;;
+
+ windows)
+ # Like Linux, but with '-' rather than '.', since we only
+ # want one extension on Windows 95.
+ major=`expr $current - $age`
+ versuffix="-$major-$age-$revision"
+ ;;
+
+ *)
+ $echo "$modename: unknown library version type \`$version_type'" 1>&2
+ echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2
+ exit 1
+ ;;
+ esac
+
+ # Clear the version info if we defaulted, and they specified a release.
+ if test -z "$vinfo" && test -n "$release"; then
+ major=
+ verstring="0.0"
+ if test "$need_version" = no; then
+ versuffix=
+ else
+ versuffix=".0.0"
+ fi
+ fi
+
+ # Remove version info from name if versioning should be avoided
+ if test "$avoid_version" = yes && test "$need_version" = no; then
+ major=
+ versuffix=
+ verstring=""
+ fi
+
+ # Check to see if the archive will have undefined symbols.
+ if test "$allow_undefined" = yes; then
+ if test "$allow_undefined_flag" = unsupported; then
+ $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
+ build_libtool_libs=no
+ build_old_libs=yes
+ fi
+ else
+ # Don't allow undefined symbols.
+ allow_undefined_flag="$no_undefined_flag"
+ fi
+ fi
+
+ if test "$mode" != relink; then
+ # Remove our outputs.
+ $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*"
+ $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*
+ fi
+
+ # Now set the variables for building old libraries.
+ if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+ oldlibs="$oldlibs $output_objdir/$libname.$libext"
+
+ # Transform .lo files to .o files.
+ oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
+ fi
+
+ # Eliminate all temporary directories.
+ for path in $uninst_path; do
+ lib_search_path=`echo "$lib_search_path " | sed -e 's% $path % %g'`
+ deplibs=`echo "$deplibs " | sed -e 's% -L$path % %g'`
+ dependency_libs=`echo "$dependency_libs " | sed -e 's% -L$path % %g'`
+ done
+
+ if test -n "$xrpath"; then
+ # If the user specified any rpath flags, then add them.
+ temp_xrpath=
+ for libdir in $xrpath; do
+ temp_xrpath="$temp_xrpath -R$libdir"
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir" ;;
+ esac
+ done
+ if test "$hardcode_into_libs" = no || test $build_old_libs = yes; then
+ dependency_libs="$temp_xrpath $dependency_libs"
+ fi
+ fi
+
+ # Make sure dlfiles contains only unique files that won't be dlpreopened
+ old_dlfiles="$dlfiles"
+ dlfiles=
+ for lib in $old_dlfiles; do
+ case " $dlprefiles $dlfiles " in
+ *" $lib "*) ;;
+ *) dlfiles="$dlfiles $lib" ;;
+ esac
+ done
+
+ # Make sure dlprefiles contains only unique files
+ old_dlprefiles="$dlprefiles"
+ dlprefiles=
+ for lib in $old_dlprefiles; do
+ case "$dlprefiles " in
+ *" $lib "*) ;;
+ *) dlprefiles="$dlprefiles $lib" ;;
+ esac
+ done
+
+ if test "$build_libtool_libs" = yes; then
+ if test -n "$rpath"; then
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
+ # these systems don't actually have a c library (as such)!
+ ;;
+ *)
+ # Add libc to deplibs on all other systems.
+ deplibs="$deplibs -lc"
+ ;;
+ esac
+ fi
+
+ # Transform deplibs into only deplibs that can be linked in shared.
+ name_save=$name
+ libname_save=$libname
+ release_save=$release
+ versuffix_save=$versuffix
+ major_save=$major
+ # I'm not sure if I'm treating the release correctly. I think
+ # release should show up in the -l (ie -lgmp5) so we don't want to
+ # add it in twice. Is that correct?
+ release=""
+ versuffix=""
+ major=""
+ newdeplibs=
+ droppeddeps=no
+ case "$deplibs_check_method" in
+ pass_all)
+ # Don't check for shared/static. Everything works.
+ # This might be a little naive. We might want to check
+ # whether the library exists or not. But this is on
+ # osf3 & osf4 and I'm not really sure... Just
+ # implementing what was already the behaviour.
+ newdeplibs=$deplibs
+ ;;
+ test_compile)
+ # This code stresses the "libraries are programs" paradigm to its
+ # limits. Maybe even breaks it. We compile a program, linking it
+ # against the deplibs as a proxy for the library. Then we can check
+ # whether they linked in statically or dynamically with ldd.
+ $rm conftest.c
+ cat > conftest.c <<EOF
+ int main() { return 0; }
+EOF
+ $rm conftest
+ $CC -o conftest conftest.c $deplibs
+ if test $? -eq 0 ; then
+ ldd_output=`ldd conftest`
+ for i in $deplibs; do
+ name="`expr $i : '-l\(.*\)'`"
+ # If $name is empty we are operating on a -L argument.
+ if test "$name" != "" ; then
+ libname=`eval \\$echo \"$libname_spec\"`
+ deplib_matches=`eval \\$echo \"$library_names_spec\"`
+ set dummy $deplib_matches
+ deplib_match=$2
+ if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+ newdeplibs="$newdeplibs $i"
+ else
+ droppeddeps=yes
+ echo
+ echo "*** Warning: This library needs some functionality provided by $i."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ fi
+ else
+ newdeplibs="$newdeplibs $i"
+ fi
+ done
+ else
+ # Error occured in the first compile. Let's try to salvage the situation:
+ # Compile a seperate program for each library.
+ for i in $deplibs; do
+ name="`expr $i : '-l\(.*\)'`"
+ # If $name is empty we are operating on a -L argument.
+ if test "$name" != "" ; then
+ $rm conftest
+ $CC -o conftest conftest.c $i
+ # Did it work?
+ if test $? -eq 0 ; then
+ ldd_output=`ldd conftest`
+ libname=`eval \\$echo \"$libname_spec\"`
+ deplib_matches=`eval \\$echo \"$library_names_spec\"`
+ set dummy $deplib_matches
+ deplib_match=$2
+ if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+ newdeplibs="$newdeplibs $i"
+ else
+ droppeddeps=yes
+ echo
+ echo "*** Warning: This library needs some functionality provided by $i."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ fi
+ else
+ droppeddeps=yes
+ echo
+ echo "*** Warning! Library $i is needed by this library but I was not able to"
+ echo "*** make it link in! You will probably need to install it or some"
+ echo "*** library that it depends on before this library will be fully"
+ echo "*** functional. Installing it before continuing would be even better."
+ fi
+ else
+ newdeplibs="$newdeplibs $i"
+ fi
+ done
+ fi
+ ;;
+ file_magic*)
+ set dummy $deplibs_check_method
+ file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+ for a_deplib in $deplibs; do
+ name="`expr $a_deplib : '-l\(.*\)'`"
+ # If $name is empty we are operating on a -L argument.
+ if test "$name" != "" ; then
+ libname=`eval \\$echo \"$libname_spec\"`
+ for i in $lib_search_path; do
+ potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+ for potent_lib in $potential_libs; do
+ # Follow soft links.
+ if ls -lLd "$potent_lib" 2>/dev/null \
+ | grep " -> " >/dev/null; then
+ continue
+ fi
+ # The statement above tries to avoid entering an
+ # endless loop below, in case of cyclic links.
+ # We might still enter an endless loop, since a link
+ # loop can be closed while we follow links,
+ # but so what?
+ potlib="$potent_lib"
+ while test -h "$potlib" 2>/dev/null; do
+ potliblink=`ls -ld $potlib | sed 's/.* -> //'`
+ case "$potliblink" in
+ [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+ *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
+ esac
+ done
+ if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \
+ | sed 10q \
+ | egrep "$file_magic_regex" > /dev/null; then
+ newdeplibs="$newdeplibs $a_deplib"
+ a_deplib=""
+ break 2
+ fi
+ done
+ done
+ if test -n "$a_deplib" ; then
+ droppeddeps=yes
+ echo
+ echo "*** Warning: This library needs some functionality provided by $a_deplib."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ fi
+ else
+ # Add a -L argument.
+ newdeplibs="$newdeplibs $a_deplib"
+ fi
+ done # Gone through all deplibs.
+ ;;
+ none | unknown | *)
+ newdeplibs=""
+ if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \
+ -e 's/ -[LR][^ ]*//g' -e 's/[ ]//g' |
+ grep . >/dev/null; then
+ echo
+ if test "X$deplibs_check_method" = "Xnone"; then
+ echo "*** Warning: inter-library dependencies are not supported in this platform."
+ else
+ echo "*** Warning: inter-library dependencies are not known to be supported."
+ fi
+ echo "*** All declared inter-library dependencies are being dropped."
+ droppeddeps=yes
+ fi
+ ;;
+ esac
+ versuffix=$versuffix_save
+ major=$major_save
+ release=$release_save
+ libname=$libname_save
+ name=$name_save
+
+ if test "$droppeddeps" = yes; then
+ if test "$module" = yes; then
+ echo
+ echo "*** Warning: libtool could not satisfy all declared inter-library"
+ echo "*** dependencies of module $libname. Therefore, libtool will create"
+ echo "*** a static module, that should work as long as the dlopening"
+ echo "*** application is linked with the -dlopen flag."
+ if test -z "$global_symbol_pipe"; then
+ echo
+ echo "*** However, this would only work if libtool was able to extract symbol"
+ echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+ echo "*** not find such a program. So, this module is probably useless."
+ echo "*** \`nm' from GNU binutils and a full rebuild may help."
+ fi
+ if test "$build_old_libs" = no; then
+ oldlibs="$output_objdir/$libname.$libext"
+ build_libtool_libs=module
+ build_old_libs=yes
+ else
+ build_libtool_libs=no
+ fi
+ else
+ echo "*** The inter-library dependencies that have been dropped here will be"
+ echo "*** automatically added whenever a program is linked with this library"
+ echo "*** or is declared to -dlopen it."
+ fi
+ fi
+ # Done checking deplibs!
+ deplibs=$newdeplibs
+ fi
+
+ # All the library-specific variables (install_libdir is set above).
+ library_names=
+ old_library=
+ dlname=
+
+ # Test again, we may have decided not to build it any more
+ if test "$build_libtool_libs" = yes; then
+ if test "$hardcode_into_libs" != no; then
+ # Hardcode the library paths
+ hardcode_libdirs=
+ dep_rpath=
+ rpath="$finalize_rpath"
+ test "$mode" != relink && rpath="$compile_rpath$rpath"
+ for libdir in $rpath; do
+ if test -n "$hardcode_libdir_flag_spec"; then
+ if test -n "$hardcode_libdir_separator"; then
+ if test -z "$hardcode_libdirs"; then
+ hardcode_libdirs="$libdir"
+ else
+ # Just accumulate the unique libdirs.
+ case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
+ *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+ ;;
+ *)
+ hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+ ;;
+ esac
+ fi
+ else
+ eval flag=\"$hardcode_libdir_flag_spec\"
+ dep_rpath="$dep_rpath $flag"
+ fi
+ elif test -n "$runpath_var"; then
+ case "$perm_rpath " in
+ *" $libdir "*) ;;
+ *) perm_rpath="$perm_rpath $libdir" ;;
+ esac
+ fi
+ done
+ # Substitute the hardcoded libdirs into the rpath.
+ if test -n "$hardcode_libdir_separator" &&
+ test -n "$hardcode_libdirs"; then
+ libdir="$hardcode_libdirs"
+ eval dep_rpath=\"$hardcode_libdir_flag_spec\"
+ fi
+ if test -n "$runpath_var" && test -n "$perm_rpath"; then
+ # We should set the runpath_var.
+ rpath=
+ for dir in $perm_rpath; do
+ rpath="$rpath$dir:"
+ done
+ eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+ fi
+ test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+ fi
+
+ shlibpath="$finalize_shlibpath"
+ test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+ if test -n "$shlibpath"; then
+ eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+ fi
+
+ # Get the real and link names of the library.
+ eval library_names=\"$library_names_spec\"
+ set dummy $library_names
+ realname="$2"
+ shift; shift
+
+ if test -n "$soname_spec"; then
+ eval soname=\"$soname_spec\"
+ else
+ soname="$realname"
+ fi
+
+ lib="$output_objdir/$realname"
+ for link
+ do
+ linknames="$linknames $link"
+ done
+
+ # Ensure that we have .o objects for linkers which dislike .lo
+ # (e.g. aix) in case we are running --disable-static
+ for obj in $libobjs; do
+ xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$obj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+ oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+ if test ! -f $xdir/$oldobj; then
+ $show "(cd $xdir && ${LN_S} $baseobj $oldobj)"
+ $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $?
+ fi
+ done
+
+ # Use standard objects if they are pic
+ test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+
+ # Prepare the list of exported symbols
+ if test -z "$export_symbols"; then
+ if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+ $show "generating symbol list for \`$libname.la'"
+ export_symbols="$output_objdir/$libname.exp"
+ $run $rm $export_symbols
+ eval cmds=\"$export_symbols_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ if test -n "$export_symbols_regex"; then
+ $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\""
+ $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+ $show "$mv \"${export_symbols}T\" \"$export_symbols\""
+ $run eval '$mv "${export_symbols}T" "$export_symbols"'
+ fi
+ fi
+ fi
+
+ if test -n "$export_symbols" && test -n "$include_expsyms"; then
+ $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"'
+ fi
+
+ if test -n "$convenience"; then
+ if test -n "$whole_archive_flag_spec"; then
+ eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+ else
+ gentop="$output_objdir/${outputname}x"
+ $show "${rm}r $gentop"
+ $run ${rm}r "$gentop"
+ $show "mkdir $gentop"
+ $run mkdir "$gentop"
+ status=$?
+ if test $status -ne 0 && test ! -d "$gentop"; then
+ exit $status
+ fi
+ generated="$generated $gentop"
+
+ for xlib in $convenience; do
+ # Extract the objects.
+ case "$xlib" in
+ [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+ *) xabs=`pwd`"/$xlib" ;;
+ esac
+ xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+ xdir="$gentop/$xlib"
+
+ $show "${rm}r $xdir"
+ $run ${rm}r "$xdir"
+ $show "mkdir $xdir"
+ $run mkdir "$xdir"
+ status=$?
+ if test $status -ne 0 && test ! -d "$xdir"; then
+ exit $status
+ fi
+ $show "(cd $xdir && $AR x $xabs)"
+ $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+ libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+ done
+ fi
+ fi
+
+ if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+ eval flag=\"$thread_safe_flag_spec\"
+ linker_flags="$linker_flags $flag"
+ fi
+
+ # Make a backup of the uninstalled library when relinking
+ if test "$mode" = relink && test "$hardcode_into_libs" = all; then
+ $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $?
+ fi
+
+ # Do each of the archive commands.
+ if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+ eval cmds=\"$archive_expsym_cmds\"
+ else
+ eval cmds=\"$archive_cmds\"
+ fi
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+
+ # Restore the uninstalled library and exit
+ if test "$mode" = relink && test "$hardcode_into_libs" = all; then
+ $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $?
+ exit 0
+ fi
+
+ # Create links to the real library.
+ for linkname in $linknames; do
+ if test "$realname" != "$linkname"; then
+ $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)"
+ $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $?
+ fi
+ done
+
+ # If -module or -export-dynamic was specified, set the dlname.
+ if test "$module" = yes || test "$export_dynamic" = yes; then
+ # On all known operating systems, these are identical.
+ dlname="$soname"
+ fi
+ fi
+ ;;
+
+ obj)
+ if test -n "$deplibs"; then
+ $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2
+ fi
+
+ if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+ $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$rpath"; then
+ $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$xrpath"; then
+ $echo "$modename: warning: \`-R' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for objects" 1>&2
+ fi
+
+ case "$output" in
+ *.lo)
+ if test -n "$objs$old_deplibs"; then
+ $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
+ exit 1
+ fi
+ libobj="$output"
+ obj=`$echo "X$output" | $Xsed -e "$lo2o"`
+ ;;
+ *)
+ libobj=
+ obj="$output"
+ ;;
+ esac
+
+ # Delete the old objects.
+ $run $rm $obj $libobj
+
+ # Objects from convenience libraries. This assumes
+ # single-version convenience libraries. Whenever we create
+ # different ones for PIC/non-PIC, this we'll have to duplicate
+ # the extraction.
+ reload_conv_objs=
+ gentop=
+ # reload_cmds runs $LD directly, so let us get rid of
+ # -Wl from whole_archive_flag_spec
+ wl=
+
+ if test -n "$convenience"; then
+ if test -n "$whole_archive_flag_spec"; then
+ eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\"
+ else
+ gentop="$output_objdir/${obj}x"
+ $show "${rm}r $gentop"
+ $run ${rm}r "$gentop"
+ $show "mkdir $gentop"
+ $run mkdir "$gentop"
+ status=$?
+ if test $status -ne 0 && test ! -d "$gentop"; then
+ exit $status
+ fi
+ generated="$generated $gentop"
+
+ for xlib in $convenience; do
+ # Extract the objects.
+ case "$xlib" in
+ [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+ *) xabs=`pwd`"/$xlib" ;;
+ esac
+ xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+ xdir="$gentop/$xlib"
+
+ $show "${rm}r $xdir"
+ $run ${rm}r "$xdir"
+ $show "mkdir $xdir"
+ $run mkdir "$xdir"
+ status=$?
+ if test $status -ne 0 && test ! -d "$xdir"; then
+ exit $status
+ fi
+ $show "(cd $xdir && $AR x $xabs)"
+ $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+ reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+ done
+ fi
+ fi
+
+ # Create the old-style object.
+ reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+ output="$obj"
+ eval cmds=\"$reload_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+
+ # Exit if we aren't doing a library object file.
+ if test -z "$libobj"; then
+ if test -n "$gentop"; then
+ $show "${rm}r $gentop"
+ $run ${rm}r $gentop
+ fi
+
+ exit 0
+ fi
+
+ if test "$build_libtool_libs" != yes; then
+ if test -n "$gentop"; then
+ $show "${rm}r $gentop"
+ $run ${rm}r $gentop
+ fi
+
+ # Create an invalid libtool object if no PIC, so that we don't
+ # accidentally link it into a program.
+ $show "echo timestamp > $libobj"
+ $run eval "echo timestamp > $libobj" || exit $?
+ exit 0
+ fi
+
+ if test -n "$pic_flag" || test "$pic_mode" != default; then
+ # Only do commands if we really have different PIC objects.
+ reload_objs="$libobjs $reload_conv_objs"
+ output="$libobj"
+ eval cmds=\"$reload_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ else
+ # Just create a symlink.
+ $show $rm $libobj
+ $run $rm $libobj
+ xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$libobj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+ oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+ $show "(cd $xdir && $LN_S $oldobj $baseobj)"
+ $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $?
+ fi
+
+ if test -n "$gentop"; then
+ $show "${rm}r $gentop"
+ $run ${rm}r $gentop
+ fi
+
+ exit 0
+ ;;
+
+ prog)
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for programs" 1>&2
+ fi
+
+ if test "$preload" = yes; then
+ if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown &&
+ test "$dlopen_self_static" = unknown; then
+ $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support."
+ fi
+ fi
+
+ compile_command="$compile_command $compile_deplibs"
+ finalize_command="$finalize_command $finalize_deplibs"
+
+ if test -n "$rpath$xrpath"; then
+ # If the user specified any rpath flags, then add them.
+ for libdir in $rpath $xrpath; do
+ # This is the magic to use -rpath.
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir" ;;
+ esac
+ done
+ fi
+
+ # Now hardcode the library paths
+ rpath=
+ hardcode_libdirs=
+ for libdir in $compile_rpath $finalize_rpath; do
+ if test -n "$hardcode_libdir_flag_spec"; then
+ if test -n "$hardcode_libdir_separator"; then
+ if test -z "$hardcode_libdirs"; then
+ hardcode_libdirs="$libdir"
+ else
+ # Just accumulate the unique libdirs.
+ case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
+ *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+ ;;
+ *)
+ hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+ ;;
+ esac
+ fi
+ else
+ eval flag=\"$hardcode_libdir_flag_spec\"
+ rpath="$rpath $flag"
+ fi
+ elif test -n "$runpath_var"; then
+ case "$perm_rpath " in
+ *" $libdir "*) ;;
+ *) perm_rpath="$perm_rpath $libdir" ;;
+ esac
+ fi
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ case ":$dllsearchpath:" in
+ *":$libdir:"*) ;;
+ *) dllsearchpath="$dllsearchpath:$libdir";;
+ esac
+ ;;
+ esac
+ done
+ # Substitute the hardcoded libdirs into the rpath.
+ if test -n "$hardcode_libdir_separator" &&
+ test -n "$hardcode_libdirs"; then
+ libdir="$hardcode_libdirs"
+ eval rpath=\" $hardcode_libdir_flag_spec\"
+ fi
+ compile_rpath="$rpath"
+
+ rpath=
+ hardcode_libdirs=
+ for libdir in $finalize_rpath; do
+ if test -n "$hardcode_libdir_flag_spec"; then
+ if test -n "$hardcode_libdir_separator"; then
+ if test -z "$hardcode_libdirs"; then
+ hardcode_libdirs="$libdir"
+ else
+ # Just accumulate the unique libdirs.
+ case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
+ *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+ ;;
+ *)
+ hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+ ;;
+ esac
+ fi
+ else
+ eval flag=\"$hardcode_libdir_flag_spec\"
+ rpath="$rpath $flag"
+ fi
+ elif test -n "$runpath_var"; then
+ case "$finalize_perm_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
+ esac
+ fi
+ done
+ # Substitute the hardcoded libdirs into the rpath.
+ if test -n "$hardcode_libdir_separator" &&
+ test -n "$hardcode_libdirs"; then
+ libdir="$hardcode_libdirs"
+ eval rpath=\" $hardcode_libdir_flag_spec\"
+ fi
+ finalize_rpath="$rpath"
+
+ if test -n "$libobjs" && test "$build_old_libs" = yes; then
+ # Transform all the library objects into standard objects.
+ compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+ finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+ fi
+
+ dlsyms=
+ if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+ if test -n "$NM" && test -n "$global_symbol_pipe"; then
+ dlsyms="${outputname}S.c"
+ else
+ $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
+ fi
+ fi
+
+ if test -n "$dlsyms"; then
+ case "$dlsyms" in
+ "") ;;
+ *.c)
+ # Discover the nlist of each of the dlfiles.
+ nlist="$output_objdir/${outputname}.nm"
+
+ $show "$rm $nlist ${nlist}S ${nlist}T"
+ $run $rm "$nlist" "${nlist}S" "${nlist}T"
+
+ # Parse the name list into a source file.
+ $show "creating $output_objdir/$dlsyms"
+
+ test -z "$run" && $echo > "$output_objdir/$dlsyms" "\
+/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */
+/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+/* Prevent the only kind of declaration conflicts we can make. */
+#define lt_preloaded_symbols some_other_symbol
+
+/* External symbol declarations for the compiler. */\
+"
+
+ if test "$dlself" = yes; then
+ $show "generating symbol list for \`$output'"
+
+ test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist"
+
+ # Add our own program objects to the symbol list.
+ progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+ for arg in $progfiles; do
+ $show "extracting global C symbols from \`$arg'"
+ $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+ done
+
+ if test -n "$exclude_expsyms"; then
+ $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+ $run eval '$mv "$nlist"T "$nlist"'
+ fi
+
+ if test -n "$export_symbols_regex"; then
+ $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+ $run eval '$mv "$nlist"T "$nlist"'
+ fi
+
+ # Prepare the list of exported symbols
+ if test -z "$export_symbols"; then
+ export_symbols="$output_objdir/$output.exp"
+ $run $rm $export_symbols
+ $run eval "sed -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+ else
+ $run eval "sed -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"'
+ $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T'
+ $run eval 'mv "$nlist"T "$nlist"'
+ fi
+ fi
+
+ for arg in $dlprefiles; do
+ $show "extracting global C symbols from \`$arg'"
+ name=`echo "$arg" | sed -e 's%^.*/%%'`
+ $run eval 'echo ": $name " >> "$nlist"'
+ $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+ done
+
+ if test -z "$run"; then
+ # Make sure we have at least an empty file.
+ test -f "$nlist" || : > "$nlist"
+
+ if test -n "$exclude_expsyms"; then
+ egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+ $mv "$nlist"T "$nlist"
+ fi
+
+ # Try sorting and uniquifying the output.
+ if grep -v "^: " < "$nlist" | sort +2 | uniq > "$nlist"S; then
+ :
+ else
+ grep -v "^: " < "$nlist" > "$nlist"S
+ fi
+
+ if test -f "$nlist"S; then
+ eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"'
+ else
+ echo '/* NONE */' >> "$output_objdir/$dlsyms"
+ fi
+
+ $echo >> "$output_objdir/$dlsyms" "\
+
+#undef lt_preloaded_symbols
+
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+ const char *name;
+ lt_ptr_t address;
+}
+lt_preloaded_symbols[] =
+{\
+"
+
+ sed -n -e 's/^: \([^ ]*\) $/ {\"\1\", (lt_ptr_t) 0},/p' \
+ -e 's/^. \([^ ]*\) \([^ ]*\)$/ {"\2", (lt_ptr_t) \&\2},/p' \
+ < "$nlist" >> "$output_objdir/$dlsyms"
+
+ $echo >> "$output_objdir/$dlsyms" "\
+ {0, (lt_ptr_t) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+ return lt_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+ fi
+
+ pic_flag_for_symtable=
+ case "$host" in
+ # compiling the symbol table file with pic_flag works around
+ # a FreeBSD bug that causes programs to crash when -lm is
+ # linked before any other PIC object. But we must not use
+ # pic_flag when linking with -static. The problem exists in
+ # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+ *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+ case "$compile_command " in
+ *" -static "*) ;;
+ *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";;
+ esac;;
+ *-*-hpux*)
+ case "$compile_command " in
+ *" -static "*) ;;
+ *) pic_flag_for_symtable=" $pic_flag -DPIC";;
+ esac
+ esac
+
+ # Now compile the dynamic symbol file.
+ $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")"
+ $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $?
+
+ # Clean up the generated files.
+ $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T"
+ $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T"
+
+ # Transform the symbol file into the correct name.
+ compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+ finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+ ;;
+ *)
+ $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
+ exit 1
+ ;;
+ esac
+ else
+ # We keep going just in case the user didn't refer to
+ # lt_preloaded_symbols. The linker will fail if global_symbol_pipe
+ # really was required.
+
+ # Nullify the symbol file.
+ compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
+ finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
+ fi
+
+ if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then
+ # Replace the output file specification.
+ compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+ link_command="$compile_command$compile_rpath"
+
+ # We have no uninstalled library dependencies, so finalize right now.
+ $show "$link_command"
+ $run eval "$link_command"
+ status=$?
+
+ # Delete the generated files.
+ if test -n "$dlsyms"; then
+ $show "$rm $output_objdir/${outputname}S.${objext}"
+ $run $rm "$output_objdir/${outputname}S.${objext}"
+ fi
+
+ exit $status
+ fi
+
+ if test -n "$shlibpath_var"; then
+ # We should set the shlibpath_var
+ rpath=
+ for dir in $temp_rpath; do
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*)
+ # Absolute path.
+ rpath="$rpath$dir:"
+ ;;
+ *)
+ # Relative path: add a thisdir entry.
+ rpath="$rpath\$thisdir/$dir:"
+ ;;
+ esac
+ done
+ temp_rpath="$rpath"
+ fi
+
+ if test -n "$compile_shlibpath$finalize_shlibpath"; then
+ compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+ fi
+ if test -n "$finalize_shlibpath"; then
+ finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+ fi
+
+ compile_var=
+ finalize_var=
+ if test -n "$runpath_var"; then
+ if test -n "$perm_rpath"; then
+ # We should set the runpath_var.
+ rpath=
+ for dir in $perm_rpath; do
+ rpath="$rpath$dir:"
+ done
+ compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+ fi
+ if test -n "$finalize_perm_rpath"; then
+ # We should set the runpath_var.
+ rpath=
+ for dir in $finalize_perm_rpath; do
+ rpath="$rpath$dir:"
+ done
+ finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+ fi
+ fi
+
+ if test "$no_install" = yes; then
+ # We don't need to create a wrapper script.
+ link_command="$compile_var$compile_command$compile_rpath"
+ # Replace the output file specification.
+ link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+ # Delete the old output file.
+ $run $rm $output
+ # Link the executable and exit
+ $show "$link_command"
+ $run eval "$link_command" || exit $?
+ exit 0
+ fi
+
+ if test "$hardcode_action" = relink || test "$hardcode_into_libs" = all; then
+ # Fast installation is not supported
+ link_command="$compile_var$compile_command$compile_rpath"
+ relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+ $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2
+ $echo "$modename: \`$output' will be relinked during installation" 1>&2
+ else
+ if test "$fast_install" != no; then
+ link_command="$finalize_var$compile_command$finalize_rpath"
+ if test "$fast_install" = yes; then
+ relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
+ else
+ # fast_install is set to needless
+ relink_command=
+ fi
+ else
+ link_command="$compile_var$compile_command$compile_rpath"
+ relink_command="$finalize_var$finalize_command$finalize_rpath"
+ fi
+ fi
+
+ # Replace the output file specification.
+ link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+ # Delete the old output files.
+ $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+ $show "$link_command"
+ $run eval "$link_command" || exit $?
+
+ # Now create the wrapper script.
+ $show "creating $output"
+
+ # Quote the relink command for shipping.
+ if test -n "$relink_command"; then
+ relink_command="cd `pwd`; $relink_command"
+ relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+ fi
+
+ # Quote $echo for shipping.
+ if test "X$echo" = "X$SHELL $0 --fallback-echo"; then
+ case "$0" in
+ [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";;
+ *) qecho="$SHELL `pwd`/$0 --fallback-echo";;
+ esac
+ qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"`
+ else
+ qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+ fi
+
+ # Only actually do things if our run command is non-null.
+ if test -z "$run"; then
+ # win32 will think the script is a binary if it has
+ # a .exe suffix, so we strip it off here.
+ case $output in
+ *.exe) output=`echo $output|sed 's,.exe$,,'` ;;
+ esac
+ $rm $output
+ trap "$rm $output; exit 1" 1 2 15
+
+ $echo > $output "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting. It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+sed_quote_subst='$sed_quote_subst'
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+ # install mode needs the following variable:
+ link_against_libtool_libs='$link_against_libtool_libs'
+else
+ # When we are sourced in execute mode, \$file and \$echo are already set.
+ if test \"\$libtool_execute_magic\" != \"$magic\"; then
+ echo=\"$qecho\"
+ file=\"\$0\"
+ # Make sure echo works.
+ if test \"X\$1\" = X--no-reexec; then
+ # Discard the --no-reexec flag, and continue.
+ shift
+ elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then
+ # Yippee, \$echo works!
+ :
+ else
+ # Restart under the correct shell, and then maybe \$echo will work.
+ exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
+ fi
+ fi\
+"
+ $echo >> $output "\
+
+ # Find the directory that this script lives in.
+ thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
+ test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+ # Follow symbolic links until we get to the real thisdir.
+ file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\`
+ while test -n \"\$file\"; do
+ destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
+
+ # If there was a directory component, then change thisdir.
+ if test \"x\$destdir\" != \"x\$file\"; then
+ case \"\$destdir\" in
+ [\\/]* | [A-Za-z]:[\\/]*) thisdir=\"\$destdir\" ;;
+ *) thisdir=\"\$thisdir/\$destdir\" ;;
+ esac
+ fi
+
+ file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
+ file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\`
+ done
+
+ # Try to get the absolute directory name.
+ absdir=\`cd \"\$thisdir\" && pwd\`
+ test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+ if test "$fast_install" = yes; then
+ echo >> $output "\
+ program=lt-'$outputname'
+ progdir=\"\$thisdir/$objdir\"
+
+ if test ! -f \"\$progdir/\$program\" || \\
+ { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | sed 1q\`; \\
+ test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+ file=\"\$\$-\$program\"
+
+ if test ! -d \"\$progdir\"; then
+ $mkdir \"\$progdir\"
+ else
+ $rm \"\$progdir/\$file\"
+ fi"
+
+ echo >> $output "\
+
+ # relink executable if necessary
+ if test -n \"\$relink_command\"; then
+ if (eval \$relink_command); then :
+ else
+ $rm \"\$progdir/\$file\"
+ exit 1
+ fi
+ fi
+
+ $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+ { $rm \"\$progdir/\$program\";
+ $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+ $rm \"\$progdir/\$file\"
+ fi"
+ else
+ echo >> $output "\
+ program='$outputname'
+ progdir=\"\$thisdir/$objdir\"
+"
+ fi
+
+ echo >> $output "\
+
+ if test -f \"\$progdir/\$program\"; then"
+
+ # Export our shlibpath_var if we have one.
+ if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+ $echo >> $output "\
+ # Add our own library path to $shlibpath_var
+ $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+ # Some systems cannot cope with colon-terminated $shlibpath_var
+ # The second colon is a workaround for a bug in BeOS R4 sed
+ $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
+
+ export $shlibpath_var
+"
+ fi
+
+ # fixup the dll searchpath if we need to.
+ if test -n "$dllsearchpath"; then
+ $echo >> $output "\
+ # Add the dll search path components to the executable PATH
+ PATH=$dllsearchpath:\$PATH
+"
+ fi
+
+ $echo >> $output "\
+ if test \"\$libtool_execute_magic\" != \"$magic\"; then
+ # Run the actual program with our arguments.
+"
+ case $host in
+ *-*-cygwin* | *-*-mingw | *-*-os2*)
+ # win32 systems need to use the prog path for dll
+ # lookup to work
+ $echo >> $output "\
+ exec \$progdir\\\\\$program \${1+\"\$@\"}
+"
+ ;;
+ *)
+ $echo >> $output "\
+ # Export the path to the program.
+ PATH=\"\$progdir:\$PATH\"
+ export PATH
+
+ exec \$program \${1+\"\$@\"}
+"
+ ;;
+ esac
+ $echo >> $output "\
+ \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
+ exit 1
+ fi
+ else
+ # The program doesn't exist.
+ \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2
+ \$echo \"This script is just a wrapper for \$program.\" 1>&2
+ echo \"See the $PACKAGE documentation for more information.\" 1>&2
+ exit 1
+ fi
+fi\
+"
+ chmod +x $output
+ fi
+ exit 0
+ ;;
+ esac
+
+ # See if we need to build an old-fashioned archive.
+ for oldlib in $oldlibs; do
+
+ if test "$build_libtool_libs" = convenience; then
+ oldobjs="$libobjs_save"
+ addlibs="$convenience"
+ build_libtool_libs=no
+ else
+ if test "$build_libtool_libs" = module; then
+ oldobjs="$libobjs_save"
+ build_libtool_libs=no
+ else
+ oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`
+ fi
+ addlibs="$old_convenience"
+ fi
+
+ if test -n "$addlibs"; then
+ gentop="$output_objdir/${outputname}x"
+ $show "${rm}r $gentop"
+ $run ${rm}r "$gentop"
+ $show "mkdir $gentop"
+ $run mkdir "$gentop"
+ status=$?
+ if test $status -ne 0 && test ! -d "$gentop"; then
+ exit $status
+ fi
+ generated="$generated $gentop"
+
+ # Add in members from convenience archives.
+ for xlib in $addlibs; do
+ # Extract the objects.
+ case "$xlib" in
+ [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+ *) xabs=`pwd`"/$xlib" ;;
+ esac
+ xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+ xdir="$gentop/$xlib"
+
+ $show "${rm}r $xdir"
+ $run ${rm}r "$xdir"
+ $show "mkdir $xdir"
+ $run mkdir "$xdir"
+ status=$?
+ if test $status -ne 0 && test ! -d "$xdir"; then
+ exit $status
+ fi
+ $show "(cd $xdir && $AR x $xabs)"
+ $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+ oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP`
+ done
+ fi
+
+ # Do each command in the archive commands.
+ if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+ eval cmds=\"$old_archive_from_new_cmds\"
+ else
+ # Ensure that we have .o objects in place in case we decided
+ # not to build a shared library, and have fallen back to building
+ # static libs even though --disable-static was passed!
+ for oldobj in $oldobjs; do
+ if test ! -f $oldobj; then
+ xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$oldobj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'`
+ obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+ $show "(cd $xdir && ${LN_S} $obj $baseobj)"
+ $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $?
+ fi
+ done
+
+ eval cmds=\"$old_archive_cmds\"
+ fi
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ done
+
+ if test -n "$generated"; then
+ $show "${rm}r$generated"
+ $run ${rm}r$generated
+ fi
+
+ # Now create the libtool archive.
+ case "$output" in
+ *.la)
+ old_library=
+ test "$build_old_libs" = yes && old_library="$libname.$libext"
+ $show "creating $output"
+
+ # Quote the link command for shipping.
+ relink_command="cd `pwd`; $SHELL $0 --mode=relink $libtool_args"
+ relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+
+ # Only create the output if not a dry run.
+ if test -z "$run"; then
+ for installed in no yes; do
+ if test "$installed" = yes; then
+ if test -z "$install_libdir"; then
+ break
+ fi
+ output="$output_objdir/$outputname"i
+ # Replace all uninstalled libtool libraries with the installed ones
+ newdependency_libs=
+ for deplib in $dependency_libs; do
+ case "$deplib" in
+ *.la)
+ name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'`
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ newdependency_libs="$newdependency_libs $libdir/$name"
+ ;;
+ *) newdependency_libs="$newdependency_libs $deplib" ;;
+ esac
+ done
+ dependency_libs="$newdependency_libs"
+ newdlfiles=
+ for lib in $dlfiles; do
+ name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ newdlfiles="$newdlfiles $libdir/$name"
+ done
+ dlfiles="$newdlfiles"
+ newdlprefiles=
+ for lib in $dlprefiles; do
+ name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ newdlprefiles="$newdlprefiles $libdir/$name"
+ done
+ dlprefiles="$newdlprefiles"
+ fi
+ $rm $output
+ $echo > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$dlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+ if test "$installed" = no; then
+ $echo >> $output "\
+relink_command=\"$relink_command\""
+ fi
+ done
+ fi
+
+ # Do a symbolic link so that the libtool archive can be found in
+ # LD_LIBRARY_PATH before the program is installed.
+ $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)"
+ $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $?
+ ;;
+ esac
+ exit 0
+ ;;
+
+ # libtool install mode
+ install)
+ modename="$modename: install"
+
+ # There may be an optional sh(1) argument at the beginning of
+ # install_prog (especially on Windows NT).
+ if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh; then
+ # Aesthetically quote it.
+ arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*)
+ arg="\"$arg\""
+ ;;
+ esac
+ install_prog="$arg "
+ arg="$1"
+ shift
+ else
+ install_prog=
+ arg="$nonopt"
+ fi
+
+ # The real first argument should be the name of the installation program.
+ # Aesthetically quote it.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*)
+ arg="\"$arg\""
+ ;;
+ esac
+ install_prog="$install_prog$arg"
+
+ # We need to accept at least all the BSD install flags.
+ dest=
+ files=
+ opts=
+ prev=
+ install_type=
+ isdir=no
+ stripme=
+ for arg
+ do
+ if test -n "$dest"; then
+ files="$files $dest"
+ dest="$arg"
+ continue
+ fi
+
+ case "$arg" in
+ -d) isdir=yes ;;
+ -f) prev="-f" ;;
+ -g) prev="-g" ;;
+ -m) prev="-m" ;;
+ -o) prev="-o" ;;
+ -s)
+ stripme=" -s"
+ continue
+ ;;
+ -*) ;;
+
+ *)
+ # If the previous option needed an argument, then skip it.
+ if test -n "$prev"; then
+ prev=
+ else
+ dest="$arg"
+ continue
+ fi
+ ;;
+ esac
+
+ # Aesthetically quote the argument.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*)
+ arg="\"$arg\""
+ ;;
+ esac
+ install_prog="$install_prog $arg"
+ done
+
+ if test -z "$install_prog"; then
+ $echo "$modename: you must specify an install program" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ if test -n "$prev"; then
+ $echo "$modename: the \`$prev' option requires an argument" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ if test -z "$files"; then
+ if test -z "$dest"; then
+ $echo "$modename: no file or destination specified" 1>&2
+ else
+ $echo "$modename: you must specify a destination" 1>&2
+ fi
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Strip any trailing slash from the destination.
+ dest=`$echo "X$dest" | $Xsed -e 's%/$%%'`
+
+ # Check to see that the destination is a directory.
+ test -d "$dest" && isdir=yes
+ if test "$isdir" = yes; then
+ destdir="$dest"
+ destname=
+ else
+ destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$destdir" = "X$dest" && destdir=.
+ destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'`
+
+ # Not a directory, so check to see that there is only one file specified.
+ set dummy $files
+ if test $# -gt 2; then
+ $echo "$modename: \`$dest' is not a directory" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+ fi
+ case "$destdir" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ for file in $files; do
+ case "$file" in
+ *.lo) ;;
+ *)
+ $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+ esac
+ done
+ ;;
+ esac
+
+ # This variable tells wrapper scripts just to set variables rather
+ # than running their programs.
+ libtool_install_magic="$magic"
+
+ staticlibs=
+ future_libdirs=
+ current_libdirs=
+ for file in $files; do
+
+ # Do each installation.
+ case "$file" in
+ *.$libext)
+ # Do the static libraries later.
+ staticlibs="$staticlibs $file"
+ ;;
+
+ *.la)
+ # Check to see that this really is a libtool archive.
+ if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+ else
+ $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ library_names=
+ old_library=
+ relink_command=
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Add the libdir to current_libdirs if it is the destination.
+ if test "X$destdir" = "X$libdir"; then
+ case "$current_libdirs " in
+ *" $libdir "*) ;;
+ *) current_libdirs="$current_libdirs $libdir" ;;
+ esac
+ else
+ # Note the libdir as a future libdir.
+ case "$future_libdirs " in
+ *" $libdir "*) ;;
+ *) future_libdirs="$future_libdirs $libdir" ;;
+ esac
+ fi
+
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/
+ test "X$dir" = "X$file/" && dir=
+ dir="$dir$objdir"
+
+ if test "$hardcode_into_libs" = all; then
+ if test -z "$relink_command"; then
+ $echo "$modename: invalid libtool pseudo library \`$file'" 1>&2
+ exit 1
+ fi
+ $echo "$modename: warning: relinking \`$file'" 1>&2
+ $show "$relink_command"
+ if $run eval "$relink_command"; then :
+ else
+ $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+ continue
+ fi
+ fi
+
+ # See the names of the shared library.
+ set dummy $library_names
+ if test -n "$2"; then
+ realname="$2"
+ shift
+ shift
+
+ srcname="$realname"
+ test "$hardcode_into_libs" = all && srcname="$realname"T
+
+ # Install the shared library and build the symlinks.
+ $show "$install_prog $dir/$srcname $destdir/$realname"
+ $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $?
+ if test -n "$stripme" && test -n "$striplib"; then
+ $show "$striplib $destdir/$realname"
+ $run eval "$striplib $destdir/$realname" || exit $?
+ fi
+
+ if test $# -gt 0; then
+ # Delete the old symlinks, and create new ones.
+ for linkname
+ do
+ if test "$linkname" != "$realname"; then
+ $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+ $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+ fi
+ done
+ fi
+
+ # Do each command in the postinstall commands.
+ lib="$destdir/$realname"
+ eval cmds=\"$postinstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ fi
+
+ # Install the pseudo-library for information purposes.
+ name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ instname="$dir/$name"i
+ $show "$install_prog $instname $destdir/$name"
+ $run eval "$install_prog $instname $destdir/$name" || exit $?
+
+ # Maybe install the static library, too.
+ test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
+ ;;
+
+ *.lo)
+ # Install (i.e. copy) a libtool object.
+
+ # Figure out destination file name, if it wasn't already specified.
+ if test -n "$destname"; then
+ destfile="$destdir/$destname"
+ else
+ destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ destfile="$destdir/$destfile"
+ fi
+
+ # Deduce the name of the destination old-style object file.
+ case "$destfile" in
+ *.lo)
+ staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"`
+ ;;
+ *.$objext)
+ staticdest="$destfile"
+ destfile=
+ ;;
+ *)
+ $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+ esac
+
+ # Install the libtool object if requested.
+ if test -n "$destfile"; then
+ $show "$install_prog $file $destfile"
+ $run eval "$install_prog $file $destfile" || exit $?
+ fi
+
+ # Install the old object if enabled.
+ if test "$build_old_libs" = yes; then
+ # Deduce the name of the old-style object file.
+ staticobj=`$echo "X$file" | $Xsed -e "$lo2o"`
+
+ $show "$install_prog $staticobj $staticdest"
+ $run eval "$install_prog \$staticobj \$staticdest" || exit $?
+ fi
+ exit 0
+ ;;
+
+ *)
+ # Figure out destination file name, if it wasn't already specified.
+ if test -n "$destname"; then
+ destfile="$destdir/$destname"
+ else
+ destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ destfile="$destdir/$destfile"
+ fi
+
+ # Do a test to see if this is really a libtool program.
+ if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ link_against_libtool_libs=
+ relink_command=
+
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Check the variables that should have been set.
+ if test -z "$link_against_libtool_libs"; then
+ $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2
+ exit 1
+ fi
+
+ finalize=yes
+ for lib in $link_against_libtool_libs; do
+ # Check to see that each library is installed.
+ libdir=
+ if test -f "$lib"; then
+ # If there is no directory component, then add one.
+ case "$lib" in
+ */* | *\\*) . $lib ;;
+ *) . ./$lib ;;
+ esac
+ fi
+ libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
+ if test -n "$libdir" && test ! -f "$libfile"; then
+ $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
+ finalize=no
+ fi
+ done
+
+ relink_command=
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ outputname=
+ if test "$fast_install" = no && test -n "$relink_command"; then
+ if test "$finalize" = yes && test -z "$run"; then
+ tmpdir="/tmp"
+ test -n "$TMPDIR" && tmpdir="$TMPDIR"
+ tmpdir="$tmpdir/libtool-$$"
+ if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then :
+ else
+ $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2
+ continue
+ fi
+ outputname="$tmpdir/$file"
+ # Replace the output file specification.
+ relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
+
+ $show "$relink_command"
+ if $run eval "$relink_command"; then :
+ else
+ $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+ ${rm}r "$tmpdir"
+ continue
+ fi
+ file="$outputname"
+ else
+ $echo "$modename: warning: cannot relink \`$file'" 1>&2
+ fi
+ else
+ # Install the binary that we compiled earlier.
+ file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
+ fi
+ fi
+
+ $show "$install_prog$stripme $file $destfile"
+ $run eval "$install_prog\$stripme \$file \$destfile" || exit $?
+ test -n "$outputname" && ${rm}r "$tmpdir"
+ ;;
+ esac
+ done
+
+ for file in $staticlibs; do
+ name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+
+ # Set up the ranlib parameters.
+ oldlib="$destdir/$name"
+
+ $show "$install_prog $file $oldlib"
+ $run eval "$install_prog \$file \$oldlib" || exit $?
+
+ if test -n "$stripme" && test -n "$striplib"; then
+ $show "$old_striplib $oldlib"
+ $run eval "$old_striplib $oldlib" || exit $?
+ fi
+
+ # Do each command in the postinstall commands.
+ eval cmds=\"$old_postinstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ done
+
+ if test -n "$future_libdirs"; then
+ $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2
+ fi
+
+ if test -n "$current_libdirs"; then
+ # Maybe just do a dry run.
+ test -n "$run" && current_libdirs=" -n$current_libdirs"
+ exec $SHELL $0 --finish$current_libdirs
+ exit 1
+ fi
+
+ exit 0
+ ;;
+
+ # libtool finish mode
+ finish)
+ modename="$modename: finish"
+ libdirs="$nonopt"
+ admincmds=
+
+ if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+ for dir
+ do
+ libdirs="$libdirs $dir"
+ done
+
+ for libdir in $libdirs; do
+ if test -n "$finish_cmds"; then
+ # Do each command in the finish commands.
+ eval cmds=\"$finish_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || admincmds="$admincmds
+ $cmd"
+ done
+ IFS="$save_ifs"
+ fi
+ if test -n "$finish_eval"; then
+ # Do the single finish_eval.
+ eval cmds=\"$finish_eval\"
+ $run eval "$cmds" || admincmds="$admincmds
+ $cmds"
+ fi
+ done
+ fi
+
+ # Exit here if they wanted silent mode.
+ test "$show" = : && exit 0
+
+ echo "----------------------------------------------------------------------"
+ echo "Libraries have been installed in:"
+ for libdir in $libdirs; do
+ echo " $libdir"
+ done
+ echo
+ echo "If you ever happen to want to link against installed libraries"
+ echo "in a given directory, LIBDIR, you must either use libtool, and"
+ echo "specify the full pathname of the library, or use \`-LLIBDIR'"
+ echo "flag during linking and do at least one of the following:"
+ if test -n "$shlibpath_var"; then
+ echo " - add LIBDIR to the \`$shlibpath_var' environment variable"
+ echo " during execution"
+ fi
+ if test -n "$runpath_var"; then
+ echo " - add LIBDIR to the \`$runpath_var' environment variable"
+ echo " during linking"
+ fi
+ if test -n "$hardcode_libdir_flag_spec"; then
+ libdir=LIBDIR
+ eval flag=\"$hardcode_libdir_flag_spec\"
+
+ echo " - use the \`$flag' linker flag"
+ fi
+ if test -n "$admincmds"; then
+ echo " - have your system administrator run these commands:$admincmds"
+ fi
+ if test -f /etc/ld.so.conf; then
+ echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+ fi
+ echo
+ echo "See any operating system documentation about shared libraries for"
+ echo "more information, such as the ld(1) and ld.so(8) manual pages."
+ echo "----------------------------------------------------------------------"
+ exit 0
+ ;;
+
+ # libtool execute mode
+ execute)
+ modename="$modename: execute"
+
+ # The first argument is the command name.
+ cmd="$nonopt"
+ if test -z "$cmd"; then
+ $echo "$modename: you must specify a COMMAND" 1>&2
+ $echo "$help"
+ exit 1
+ fi
+
+ # Handle -dlopen flags immediately.
+ for file in $execute_dlfiles; do
+ if test ! -f "$file"; then
+ $echo "$modename: \`$file' is not a file" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ dir=
+ case "$file" in
+ *.la)
+ # Check to see that this really is a libtool archive.
+ if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+ else
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Read the libtool library.
+ dlname=
+ library_names=
+
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Skip this library if it cannot be dlopened.
+ if test -z "$dlname"; then
+ # Warn if it was a shared library.
+ test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'"
+ continue
+ fi
+
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$dir" = "X$file" && dir=.
+
+ if test -f "$dir/$objdir/$dlname"; then
+ dir="$dir/$objdir"
+ else
+ $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2
+ exit 1
+ fi
+ ;;
+
+ *.lo)
+ # Just add the directory containing the .lo file.
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$dir" = "X$file" && dir=.
+ ;;
+
+ *)
+ $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
+ continue
+ ;;
+ esac
+
+ # Get the absolute pathname.
+ absdir=`cd "$dir" && pwd`
+ test -n "$absdir" && dir="$absdir"
+
+ # Now add the directory to shlibpath_var.
+ if eval "test -z \"\$$shlibpath_var\""; then
+ eval "$shlibpath_var=\"\$dir\""
+ else
+ eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+ fi
+ done
+
+ # This variable tells wrapper scripts just to set shlibpath_var
+ # rather than running their programs.
+ libtool_execute_magic="$magic"
+
+ # Check if any of the arguments is a wrapper script.
+ args=
+ for file
+ do
+ case "$file" in
+ -*) ;;
+ *)
+ # Do a test to see if this is really a libtool program.
+ if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Transform arg to wrapped name.
+ file="$progdir/$program"
+ fi
+ ;;
+ esac
+ # Quote arguments (to preserve shell metacharacters).
+ file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
+ args="$args \"$file\""
+ done
+
+ if test -z "$run"; then
+ if test -n "$shlibpath_var"; then
+ # Export the shlibpath_var.
+ eval "export $shlibpath_var"
+ fi
+
+ # Restore saved enviroment variables
+ if test "${save_LC_ALL+set}" = set; then
+ LC_ALL="$save_LC_ALL"; export LC_ALL
+ fi
+ if test "${save_LANG+set}" = set; then
+ LANG="$save_LANG"; export LANG
+ fi
+
+ # Now actually exec the command.
+ eval "exec \$cmd$args"
+
+ $echo "$modename: cannot exec \$cmd$args"
+ exit 1
+ else
+ # Display what would be done.
+ if test -n "$shlibpath_var"; then
+ eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
+ $echo "export $shlibpath_var"
+ fi
+ $echo "$cmd$args"
+ exit 0
+ fi
+ ;;
+
+ # libtool clean and uninstall mode
+ clean | uninstall)
+ modename="$modename: $mode"
+ rm="$nonopt"
+ files=
+
+ # This variable tells wrapper scripts just to set variables rather
+ # than running their programs.
+ libtool_install_magic="$magic"
+
+ for arg
+ do
+ case "$arg" in
+ -*) rm="$rm $arg" ;;
+ *) files="$files $arg" ;;
+ esac
+ done
+
+ if test -z "$rm"; then
+ $echo "$modename: you must specify an RM program" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ for file in $files; do
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$dir" = "X$file"; then
+ dir=.
+ objdir="$objdir"
+ else
+ objdir="$dir/$objdir"
+ fi
+ name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ test $mode = uninstall && objdir="$dir"
+
+ rmfiles="$file"
+
+ case "$name" in
+ *.la)
+ # Possibly a libtool archive, so verify it.
+ if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ . $dir/$name
+
+ # Delete the libtool libraries and symlinks.
+ for n in $library_names; do
+ rmfiles="$rmfiles $objdir/$n"
+ done
+ test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
+ test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
+
+ if test $mode = uninstall; then
+ if test -n "$library_names"; then
+ # Do each command in the postuninstall commands.
+ eval cmds=\"$postuninstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd"
+ done
+ IFS="$save_ifs"
+ fi
+
+ if test -n "$old_library"; then
+ # Do each command in the old_postuninstall commands.
+ eval cmds=\"$old_postuninstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd"
+ done
+ IFS="$save_ifs"
+ fi
+ # FIXME: should reinstall the best remaining shared library.
+ fi
+ fi
+ ;;
+
+ *.lo)
+ if test "$build_old_libs" = yes; then
+ oldobj=`$echo "X$name" | $Xsed -e "$lo2o"`
+ rmfiles="$rmfiles $dir/$oldobj"
+ fi
+ ;;
+
+ *)
+ # Do a test to see if this is a libtool program.
+ if test $mode = clean &&
+ (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ relink_command=
+ . $dir/$file
+
+ rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
+ if test "$fast_install" = yes && test -n "$relink_command"; then
+ rmfiles="$rmfiles $objdir/lt-$name"
+ fi
+ fi
+ ;;
+ esac
+ $show "$rm $rmfiles"
+ $run $rm $rmfiles
+ done
+ exit 0
+ ;;
+
+ "")
+ $echo "$modename: you must specify a MODE" 1>&2
+ $echo "$generic_help" 1>&2
+ exit 1
+ ;;
+ esac
+
+ $echo "$modename: invalid operation mode \`$mode'" 1>&2
+ $echo "$generic_help" 1>&2
+ exit 1
+fi # test -z "$show_help"
+
+# We need to display help for each of the modes.
+case "$mode" in
+"") $echo \
+"Usage: $modename [OPTION]... [MODE-ARG]...
+
+Provide generalized library-building support services.
+
+ --config show all configuration variables
+ --debug enable verbose shell tracing
+-n, --dry-run display commands without modifying any files
+ --features display basic configuration information and exit
+ --finish same as \`--mode=finish'
+ --help display this help message and exit
+ --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS]
+ --quiet same as \`--silent'
+ --silent don't print informational messages
+ --version print version information
+
+MODE must be one of the following:
+
+ clean remove files from the build directory
+ compile compile a source file into a libtool object
+ execute automatically set library path, then run a program
+ finish complete the installation of libtool libraries
+ install install libraries or executables
+ link create a library or an executable
+ uninstall remove libraries from an installed directory
+
+MODE-ARGS vary depending on the MODE. Try \`$modename --help --mode=MODE' for
+a more detailed description of MODE."
+ exit 0
+ ;;
+
+clean)
+ $echo \
+"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+ ;;
+
+compile)
+ $echo \
+"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+ -o OUTPUT-FILE set the output file name to OUTPUT-FILE
+ -static always build a \`.o' file suitable for static linking
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+ ;;
+
+execute)
+ $echo \
+"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+ -dlopen FILE add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+ ;;
+
+finish)
+ $echo \
+"Usage: $modename [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges. Use
+the \`--dry-run' option if you just want to see what would be executed."
+ ;;
+
+install)
+ $echo \
+"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command. The first component should be
+either the \`install' or \`cp' program.
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+ ;;
+
+link)
+ $echo \
+"Usage: $modename [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+ -all-static do not do any dynamic linking at all
+ -avoid-version do not add a version suffix if possible
+ -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime
+ -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols
+ -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+ -export-symbols SYMFILE
+ try to export only the symbols listed in SYMFILE
+ -export-symbols-regex REGEX
+ try to export only the symbols matching REGEX
+ -LLIBDIR search LIBDIR for required installed libraries
+ -lNAME OUTPUT-FILE requires the installed library libNAME
+ -module build a library that can dlopened
+ -no-fast-install disable the fast-install mode
+ -no-install link a not-installable executable
+ -no-undefined declare that a library does not refer to external symbols
+ -o OUTPUT-FILE create OUTPUT-FILE from the specified objects
+ -release RELEASE specify package release information
+ -rpath LIBDIR the created library will eventually be installed in LIBDIR
+ -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries
+ -static do not do any dynamic linking of libtool libraries
+ -version-info CURRENT[:REVISION[:AGE]]
+ specify library version info [each variable defaults to 0]
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename. Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+ ;;
+
+uninstall)
+ $echo \
+"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+ ;;
+
+*)
+ $echo "$modename: invalid operation mode \`$mode'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+esac
+
+echo
+$echo "Try \`$modename --help' for more information about other modes."
+
+exit 0
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/rts/gmp/mdate-sh b/rts/gmp/mdate-sh
new file mode 100644
index 0000000000..37171f21fb
--- /dev/null
+++ b/rts/gmp/mdate-sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+# Get modification time of a file or directory and pretty-print it.
+# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+# written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, June 1995
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Prevent date giving response in another language.
+LANG=C
+export LANG
+LC_ALL=C
+export LC_ALL
+LC_TIME=C
+export LC_TIME
+
+# Get the extended ls output of the file or directory.
+# On HPUX /bin/sh, "set" interprets "-rw-r--r--" as options, so the "x" below.
+if ls -L /dev/null 1>/dev/null 2>&1; then
+ set - x`ls -L -l -d $1`
+else
+ set - x`ls -l -d $1`
+fi
+# The month is at least the fourth argument
+# (3 shifts here, the next inside the loop).
+shift
+shift
+shift
+
+# Find the month. Next argument is day, followed by the year or time.
+month=
+until test $month
+do
+ shift
+ case $1 in
+ Jan) month=January; nummonth=1;;
+ Feb) month=February; nummonth=2;;
+ Mar) month=March; nummonth=3;;
+ Apr) month=April; nummonth=4;;
+ May) month=May; nummonth=5;;
+ Jun) month=June; nummonth=6;;
+ Jul) month=July; nummonth=7;;
+ Aug) month=August; nummonth=8;;
+ Sep) month=September; nummonth=9;;
+ Oct) month=October; nummonth=10;;
+ Nov) month=November; nummonth=11;;
+ Dec) month=December; nummonth=12;;
+ esac
+done
+
+day=$2
+
+# Here we have to deal with the problem that the ls output gives either
+# the time of day or the year.
+case $3 in
+ *:*) set `date`; eval year=\$$#
+ case $2 in
+ Jan) nummonthtod=1;;
+ Feb) nummonthtod=2;;
+ Mar) nummonthtod=3;;
+ Apr) nummonthtod=4;;
+ May) nummonthtod=5;;
+ Jun) nummonthtod=6;;
+ Jul) nummonthtod=7;;
+ Aug) nummonthtod=8;;
+ Sep) nummonthtod=9;;
+ Oct) nummonthtod=10;;
+ Nov) nummonthtod=11;;
+ Dec) nummonthtod=12;;
+ esac
+ # For the first six month of the year the time notation can also
+ # be used for files modified in the last year.
+ if (expr $nummonth \> $nummonthtod) > /dev/null;
+ then
+ year=`expr $year - 1`
+ fi;;
+ *) year=$3;;
+esac
+
+# The result.
+echo $day $month $year
diff --git a/rts/gmp/memory.c b/rts/gmp/memory.c
new file mode 100644
index 0000000000..9df440ce22
--- /dev/null
+++ b/rts/gmp/memory.c
@@ -0,0 +1,160 @@
+/* Memory allocation routines.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <stdlib.h> /* for malloc, realloc, free */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef __NeXT__
+#define static
+#endif
+
+
+void * (*_mp_allocate_func) _PROTO ((size_t)) = _mp_default_allocate;
+void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t))
+ = _mp_default_reallocate;
+void (*_mp_free_func) _PROTO ((void *, size_t)) = _mp_default_free;
+
+
+/* Default allocation functions. In case of failure to allocate/reallocate
+ an error message is written to stderr and the program aborts. */
+
+void *
+#if __STDC__
+_mp_default_allocate (size_t size)
+#else
+_mp_default_allocate (size)
+ size_t size;
+#endif
+{
+ void *ret;
+#ifdef DEBUG
+ size_t req_size = size;
+ size += 2 * BYTES_PER_MP_LIMB;
+#endif
+ ret = malloc (size);
+ if (ret == 0)
+ {
+ perror ("cannot allocate in gmp");
+ abort ();
+ }
+
+#ifdef DEBUG
+ {
+ mp_ptr p = ret;
+ p++;
+ p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+ if (req_size % BYTES_PER_MP_LIMB == 0)
+ p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+ ret = p;
+ }
+#endif
+ return ret;
+}
+
+void *
+#if __STDC__
+_mp_default_reallocate (void *oldptr, size_t old_size, size_t new_size)
+#else
+_mp_default_reallocate (oldptr, old_size, new_size)
+ void *oldptr;
+ size_t old_size;
+ size_t new_size;
+#endif
+{
+ void *ret;
+
+#ifdef DEBUG
+ size_t req_size = new_size;
+
+ if (old_size != 0)
+ {
+ mp_ptr p = oldptr;
+ if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+ {
+ fprintf (stderr, "gmp: (realloc) data clobbered before allocation block\n");
+ abort ();
+ }
+ if (old_size % BYTES_PER_MP_LIMB == 0)
+ if (p[old_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+ {
+ fprintf (stderr, "gmp: (realloc) data clobbered after allocation block\n");
+ abort ();
+ }
+ oldptr = p - 1;
+ }
+
+ new_size += 2 * BYTES_PER_MP_LIMB;
+#endif
+
+ ret = realloc (oldptr, new_size);
+ if (ret == 0)
+ {
+ perror ("cannot allocate in gmp");
+ abort ();
+ }
+
+#ifdef DEBUG
+ {
+ mp_ptr p = ret;
+ p++;
+ p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+ if (req_size % BYTES_PER_MP_LIMB == 0)
+ p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+ ret = p;
+ }
+#endif
+ return ret;
+}
+
+void
+#if __STDC__
+_mp_default_free (void *blk_ptr, size_t blk_size)
+#else
+_mp_default_free (blk_ptr, blk_size)
+ void *blk_ptr;
+ size_t blk_size;
+#endif
+{
+#ifdef DEBUG
+ {
+ mp_ptr p = blk_ptr;
+ if (blk_size != 0)
+ {
+ if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+ {
+ fprintf (stderr, "gmp: (free) data clobbered before allocation block\n");
+ abort ();
+ }
+ if (blk_size % BYTES_PER_MP_LIMB == 0)
+ if (p[blk_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+ {
+ fprintf (stderr, "gmp: (free) data clobbered after allocation block\n");
+ abort ();
+ }
+ }
+ blk_ptr = p - 1;
+ }
+#endif
+ free (blk_ptr);
+}
diff --git a/rts/gmp/missing b/rts/gmp/missing
new file mode 100644
index 0000000000..c60e9d772f
--- /dev/null
+++ b/rts/gmp/missing
@@ -0,0 +1,244 @@
+#! /bin/sh
+# Common stub for a few missing GNU programs while installing.
+# Copyright (C) 1996, 1997, 1999 Free Software Foundation, Inc.
+# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+if test $# -eq 0; then
+ echo 1>&2 "Try \`$0 --help' for more information"
+ exit 1
+fi
+
+run=:
+
+case "$1" in
+--run)
+ # Try to run requested program, and just exit if it succeeds.
+ run=
+ shift
+ "$@" && exit 0
+ ;;
+esac
+
+# If it does not exist, or fails to run (possibly an outdated version),
+# try to emulate it.
+case "$1" in
+
+ -h|--h|--he|--hel|--help)
+ echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
+error status if there is no known handling for PROGRAM.
+
+Options:
+ -h, --help display this help and exit
+ -v, --version output version information and exit
+ --run try to run the given command, and emulate it if it fails
+
+Supported PROGRAM values:
+ aclocal touch file \`aclocal.m4'
+ autoconf touch file \`configure'
+ autoheader touch file \`config.h.in'
+ automake touch all \`Makefile.in' files
+ bison create \`y.tab.[ch]', if possible, from existing .[ch]
+ flex create \`lex.yy.c', if possible, from existing .c
+ lex create \`lex.yy.c', if possible, from existing .c
+ makeinfo touch the output file
+ tar try tar, gnutar, gtar, then tar without non-portable flags
+ yacc create \`y.tab.[ch]', if possible, from existing .[ch]"
+ ;;
+
+ -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+ echo "missing 0.2 - GNU automake"
+ ;;
+
+ -*)
+ echo 1>&2 "$0: Unknown \`$1' option"
+ echo 1>&2 "Try \`$0 --help' for more information"
+ exit 1
+ ;;
+
+ aclocal)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`acinclude.m4' or \`configure.in'. You might want
+ to install the \`Automake' and \`Perl' packages. Grab them from
+ any GNU archive site."
+ touch aclocal.m4
+ ;;
+
+ autoconf)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`configure.in'. You might want to install the
+ \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
+ archive site."
+ touch configure
+ ;;
+
+ autoheader)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`acconfig.h' or \`configure.in'. You might want
+ to install the \`Autoconf' and \`GNU m4' packages. Grab them
+ from any GNU archive site."
+ files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' configure.in`
+ test -z "$files" && files="config.h"
+ touch_files=
+ for f in $files; do
+ case "$f" in
+ *:*) touch_files="$touch_files "`echo "$f" |
+ sed -e 's/^[^:]*://' -e 's/:.*//'`;;
+ *) touch_files="$touch_files $f.in";;
+ esac
+ done
+ touch $touch_files
+ ;;
+
+ automake)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`Makefile.am', \`acinclude.m4' or \`configure.in'.
+ You might want to install the \`Automake' and \`Perl' packages.
+ Grab them from any GNU archive site."
+ find . -type f -name Makefile.am -print |
+ sed 's/\.am$/.in/' |
+ while read f; do touch "$f"; done
+ ;;
+
+ bison|yacc)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified a \`.y' file. You may need the \`Bison' package
+ in order for those modifications to take effect. You can get
+ \`Bison' from any GNU archive site."
+ rm -f y.tab.c y.tab.h
+ if [ $# -ne 1 ]; then
+ eval LASTARG="\${$#}"
+ case "$LASTARG" in
+ *.y)
+ SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
+ if [ -f "$SRCFILE" ]; then
+ cp "$SRCFILE" y.tab.c
+ fi
+ SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
+ if [ -f "$SRCFILE" ]; then
+ cp "$SRCFILE" y.tab.h
+ fi
+ ;;
+ esac
+ fi
+ if [ ! -f y.tab.h ]; then
+ echo >y.tab.h
+ fi
+ if [ ! -f y.tab.c ]; then
+ echo 'main() { return 0; }' >y.tab.c
+ fi
+ ;;
+
+ lex|flex)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified a \`.l' file. You may need the \`Flex' package
+ in order for those modifications to take effect. You can get
+ \`Flex' from any GNU archive site."
+ rm -f lex.yy.c
+ if [ $# -ne 1 ]; then
+ eval LASTARG="\${$#}"
+ case "$LASTARG" in
+ *.l)
+ SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
+ if [ -f "$SRCFILE" ]; then
+ cp "$SRCFILE" lex.yy.c
+ fi
+ ;;
+ esac
+ fi
+ if [ ! -f lex.yy.c ]; then
+ echo 'main() { return 0; }' >lex.yy.c
+ fi
+ ;;
+
+ makeinfo)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified a \`.texi' or \`.texinfo' file, or any other file
+ indirectly affecting the aspect of the manual. The spurious
+ call might also be the consequence of using a buggy \`make' (AIX,
+ DU, IRIX). You might want to install the \`Texinfo' package or
+ the \`GNU make' package. Grab either from any GNU archive site."
+ file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+ if test -z "$file"; then
+ file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
+ file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
+ fi
+ touch $file
+ ;;
+
+ tar)
+ shift
+ if test -n "$run"; then
+ echo 1>&2 "ERROR: \`tar' requires --run"
+ exit 1
+ fi
+
+ # We have already tried tar in the generic part.
+ # Look for gnutar/gtar before invocation to avoid ugly error
+ # messages.
+ if (gnutar --version > /dev/null 2>&1); then
+ gnutar ${1+"$@"} && exit 0
+ fi
+ if (gtar --version > /dev/null 2>&1); then
+ gtar ${1+"$@"} && exit 0
+ fi
+ firstarg="$1"
+ if shift; then
+ case "$firstarg" in
+ *o*)
+ firstarg=`echo "$firstarg" | sed s/o//`
+ tar "$firstarg" ${1+"$@"} && exit 0
+ ;;
+ esac
+ case "$firstarg" in
+ *h*)
+ firstarg=`echo "$firstarg" | sed s/h//`
+ tar "$firstarg" ${1+"$@"} && exit 0
+ ;;
+ esac
+ fi
+
+ echo 1>&2 "\
+WARNING: I can't seem to be able to run \`tar' with the given arguments.
+ You may want to install GNU tar or Free paxutils, or check the
+ command line arguments."
+ exit 1
+ ;;
+
+ *)
+ echo 1>&2 "\
+WARNING: \`$1' is needed, and you do not seem to have it handy on your
+ system. You might have modified some files without having the
+ proper tools for further handling them. Check the \`README' file,
+ it often tells you about the needed prerequirements for installing
+ this package. You may also peek at any GNU archive site, in case
+ some other package would contain this missing \`$1' program."
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/rts/gmp/mkinstalldirs b/rts/gmp/mkinstalldirs
new file mode 100644
index 0000000000..5e17cd39fb
--- /dev/null
+++ b/rts/gmp/mkinstalldirs
@@ -0,0 +1,38 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain
+
+errstatus=0
+
+for file
+do
+ set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+ shift
+
+ pathcomp=
+ for d
+ do
+ pathcomp="$pathcomp$d"
+ case "$pathcomp" in
+ -* ) pathcomp=./$pathcomp ;;
+ esac
+
+ if test ! -d "$pathcomp"; then
+ echo "mkdir $pathcomp"
+
+ mkdir "$pathcomp" || lasterr=$?
+
+ if test ! -d "$pathcomp"; then
+ errstatus=$lasterr
+ fi
+ fi
+
+ pathcomp="$pathcomp/"
+ done
+done
+
+exit $errstatus
+
+# mkinstalldirs ends here
diff --git a/rts/gmp/mp.h b/rts/gmp/mp.h
new file mode 100644
index 0000000000..ffab4cba82
--- /dev/null
+++ b/rts/gmp/mp.h
@@ -0,0 +1,124 @@
+/* mp.h -- Definitions for Berkeley compatible multiple precision functions.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifndef __MP_H__
+
+#ifndef __GNU_MP__ /* to allow inclusion of both gmp.h and mp.h */
+#define __GNU_MP__ 3
+#define __need_size_t
+#include <stddef.h>
+#undef __need_size_t
+
+#if defined (__STDC__) || defined (__cplusplus)
+#define __gmp_const const
+#else
+#define __gmp_const
+#endif
+
+#if defined (__GNUC__)
+#define __gmp_inline __inline__
+#else
+#define __gmp_inline
+#endif
+
+#ifndef _EXTERN_INLINE
+#ifdef __GNUC__
+#define _EXTERN_INLINE extern __inline__
+#else
+#define _EXTERN_INLINE static
+#endif
+#endif
+
+#ifdef _SHORT_LIMB
+typedef unsigned int mp_limb_t;
+typedef int mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb_t;
+typedef long long int mp_limb_signed_t;
+#else
+typedef unsigned long int mp_limb_t;
+typedef long int mp_limb_signed_t;
+#endif
+#endif
+
+typedef mp_limb_t * mp_ptr;
+typedef __gmp_const mp_limb_t * mp_srcptr;
+typedef int mp_size_t;
+typedef long int mp_exp_t;
+
+typedef struct
+{
+ int _mp_alloc; /* Number of *limbs* allocated and pointed
+ to by the D field. */
+ int _mp_size; /* abs(SIZE) is the number of limbs
+ the last field points to. If SIZE
+ is negative this is a negative
+ number. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpz_struct;
+#endif /* __GNU_MP__ */
+
+/* User-visible types. */
+typedef __mpz_struct MINT;
+
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+void mp_set_memory_functions _PROTO ((void *(*) (size_t),
+ void *(*) (void *, size_t, size_t),
+ void (*) (void *, size_t)));
+MINT *itom _PROTO ((signed short int));
+MINT *xtom _PROTO ((const char *));
+void move _PROTO ((const MINT *, MINT *));
+void madd _PROTO ((const MINT *, const MINT *, MINT *));
+void msub _PROTO ((const MINT *, const MINT *, MINT *));
+void mult _PROTO ((const MINT *, const MINT *, MINT *));
+void mdiv _PROTO ((const MINT *, const MINT *, MINT *, MINT *));
+void sdiv _PROTO ((const MINT *, signed short int, MINT *, signed short int *));
+void msqrt _PROTO ((const MINT *, MINT *, MINT *));
+void pow _PROTO ((const MINT *, const MINT *, const MINT *, MINT *));
+void rpow _PROTO ((const MINT *, signed short int, MINT *));
+void gcd _PROTO ((const MINT *, const MINT *, MINT *));
+int mcmp _PROTO ((const MINT *, const MINT *));
+void min _PROTO ((MINT *));
+void mout _PROTO ((const MINT *));
+char *mtox _PROTO ((const MINT *));
+void mfree _PROTO ((MINT *));
+
+#if defined (__cplusplus)
+}
+#endif
+
+#define __MP_H__
+#endif /* __MP_H__ */
diff --git a/rts/gmp/mp_bpl.c b/rts/gmp/mp_bpl.c
new file mode 100644
index 0000000000..df8b03e5ab
--- /dev/null
+++ b/rts/gmp/mp_bpl.c
@@ -0,0 +1,27 @@
+/*
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const int mp_bits_per_limb = BITS_PER_MP_LIMB;
+const int __gmp_0 = 0;
+int __gmp_junk;
diff --git a/rts/gmp/mp_clz_tab.c b/rts/gmp/mp_clz_tab.c
new file mode 100644
index 0000000000..1bbd1d6a66
--- /dev/null
+++ b/rts/gmp/mp_clz_tab.c
@@ -0,0 +1,36 @@
+/* __clz_tab -- support for longlong.h
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const
+unsigned char __clz_tab[] =
+{
+ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
diff --git a/rts/gmp/mp_minv_tab.c b/rts/gmp/mp_minv_tab.c
new file mode 100644
index 0000000000..4afff85cfc
--- /dev/null
+++ b/rts/gmp/mp_minv_tab.c
@@ -0,0 +1,50 @@
+/* A table of data supporting modlimb_invert().
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+ INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* modlimb_invert_table[i] is the multiplicative inverse of 2*i+1 mod 256,
+ ie. (modlimb_invert_table[i] * (2*i+1)) % 256 == 1 */
+
+const unsigned char modlimb_invert_table[128] = {
+ 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
+ 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
+ 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
+ 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
+ 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
+ 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
+ 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
+ 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
+ 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
+ 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
+ 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
+ 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
+ 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
+ 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
+ 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
+ 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
+};
diff --git a/rts/gmp/mp_set_fns.c b/rts/gmp/mp_set_fns.c
new file mode 100644
index 0000000000..55d4d9d6e4
--- /dev/null
+++ b/rts/gmp/mp_set_fns.c
@@ -0,0 +1,48 @@
+/* mp_set_memory_functions -- Set the allocate, reallocate, and free functions
+ for use by the mp package.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+ void *(*realloc_func) (void *, size_t, size_t),
+ void (*free_func) (void *, size_t))
+#else
+mp_set_memory_functions (alloc_func, realloc_func, free_func)
+ void *(*alloc_func) ();
+ void *(*realloc_func) ();
+ void (*free_func) ();
+#endif
+{
+ if (alloc_func == 0)
+ alloc_func = _mp_default_allocate;
+ if (realloc_func == 0)
+ realloc_func = _mp_default_reallocate;
+ if (free_func == 0)
+ free_func = _mp_default_free;
+
+ _mp_allocate_func = alloc_func;
+ _mp_reallocate_func = realloc_func;
+ _mp_free_func = free_func;
+}
diff --git a/rts/gmp/mpn/Makefile.am b/rts/gmp/mpn/Makefile.am
new file mode 100644
index 0000000000..1c49ccda25
--- /dev/null
+++ b/rts/gmp/mpn/Makefile.am
@@ -0,0 +1,94 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+SUBDIRS = tests
+
+CPP = @CPP@
+
+# -DOPERATION_$* tells multi-function files which function to produce.
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+GENERIC_SOURCES = mp_bases.c
+OFILES = @mpn_objects@
+
+noinst_LTLIBRARIES = libmpn.la
+libmpn_la_SOURCES = $(GENERIC_SOURCES)
+libmpn_la_LIBADD = $(OFILES)
+libmpn_la_DEPENDENCIES = $(OFILES)
+
+TARG_DIST = a29k alpha arm clipper cray generic hppa i960 lisp m68k m88k \
+ mips2 mips3 ns32k pa64 pa64w power powerpc32 powerpc64 pyr sh sparc32 \
+ sparc64 thumb vax x86 z8000 z8000x
+
+EXTRA_DIST = underscore.h asm-defs.m4 $(TARG_DIST)
+
+# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP?
+COMPILE_FLAGS = \
+ $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+
+SUFFIXES = .s .S .asm
+
+# *.s are not preprocessed at all.
+.s.o:
+ $(CCAS) $(COMPILE_FLAGS) $<
+.s.obj:
+ $(CCAS) $(COMPILE_FLAGS) `cygpath -w $<`
+.s.lo:
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) $<
+
+# *.S are preprocessed with CPP.
+.S.o:
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.S.obj:
+ $(CPP) $(COMPILE_FLAGS) `cygpath -w $<` | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+
+# We have to rebuild the static object file without passing -DPIC to
+# preprocessor. The overhead cost is one extra assemblation. FIXME:
+# Teach libtool how to assemble with a preprocessor pass (CPP or m4).
+
+.S.lo:
+ $(CPP) $(COMPILE_FLAGS) -DPIC $< | grep -v '^#' >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
+
+# *.m4 are preprocessed with m4.
+.asm.o:
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.obj:
+ $(M4) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.lo:
+ $(M4) -DPIC -DOPERATION_$* $< >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
diff --git a/rts/gmp/mpn/Makefile.in b/rts/gmp/mpn/Makefile.in
new file mode 100644
index 0000000000..59ee958c92
--- /dev/null
+++ b/rts/gmp/mpn/Makefile.in
@@ -0,0 +1,472 @@
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+
+@SET_MAKE@
+build_alias = @build_alias@
+build_triplet = @build@
+host_alias = @host_alias@
+host_triplet = @host@
+target_alias = @target_alias@
+target_triplet = @target@
+AMDEP = @AMDEP@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+EXEEXT = @EXEEXT@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+gmp_srclinks = @gmp_srclinks@
+install_sh = @install_sh@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+SUBDIRS =
+
+CPP = @CPP@
+
+# -DOPERATION_$* tells multi-function files which function to produce.
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+GENERIC_SOURCES = mp_bases.c
+OFILES = @mpn_objects@
+
+noinst_LTLIBRARIES = libmpn.la
+libmpn_la_SOURCES = $(GENERIC_SOURCES)
+libmpn_la_LIBADD = $(OFILES)
+libmpn_la_DEPENDENCIES = $(OFILES)
+
+TARG_DIST = a29k alpha arm clipper cray generic hppa i960 lisp m68k m88k \
+ mips2 mips3 ns32k pa64 pa64w power powerpc32 powerpc64 pyr sh sparc32 \
+ sparc64 thumb vax x86 z8000 z8000x
+
+
+EXTRA_DIST = underscore.h asm-defs.m4 $(TARG_DIST)
+
+# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP?
+COMPILE_FLAGS = \
+ $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+
+
+SUFFIXES = .s .S .asm
+subdir = mpn
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I..
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+libmpn_la_LDFLAGS =
+am_libmpn_la_OBJECTS = mp_bases.lo
+libmpn_la_OBJECTS = $(am_libmpn_la_OBJECTS)
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CFLAGS = @CFLAGS@
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libmpn_la_SOURCES)
+DIST_COMMON = README Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+GZIP_ENV = --best
+depcomp =
+SOURCES = $(libmpn_la_SOURCES)
+OBJECTS = $(am_libmpn_la_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .S .asm .c .lo .o .obj .s
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu mpn/Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+mostlyclean-noinstLTLIBRARIES:
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+
+distclean-noinstLTLIBRARIES:
+
+maintainer-clean-noinstLTLIBRARIES:
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+ -rm -f *.$(OBJEXT)
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+
+maintainer-clean-libtool:
+
+libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES)
+ $(LINK) $(libmpn_la_LDFLAGS) $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
+.c.o:
+ $(COMPILE) -c $<
+.c.obj:
+ $(COMPILE) -c `cygpath -w $<`
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+ rev="$$subdir $$rev"; \
+ if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \
+ done; \
+ test "$$dot_seen" = "no" && rev=". $$rev"; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \
+ || etags $(ETAGS_ARGS) $$tags $$unique $(LISP)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+distdir: $(DISTFILES)
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pR $$d/$$file $(distdir); \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+ for subdir in $(SUBDIRS); do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(top_distdir) distdir=../$(distdir)/$$subdir distdir) \
+ || exit 1; \
+ fi; \
+ done
+info-am:
+info: info-recursive
+dvi-am:
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+install-exec-am:
+install-exec: install-exec-recursive
+
+install-data-am:
+install-data: install-data-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am:
+uninstall: uninstall-recursive
+all-am: Makefile $(LTLIBRARIES)
+all-redirect: all-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+ -rm -f Makefile.in
+mostlyclean-am: mostlyclean-noinstLTLIBRARIES mostlyclean-compile \
+ mostlyclean-libtool mostlyclean-tags \
+ mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am: clean-noinstLTLIBRARIES clean-compile clean-libtool \
+ clean-tags clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am: distclean-noinstLTLIBRARIES distclean-compile \
+ distclean-libtool distclean-tags distclean-generic \
+ clean-am
+ -rm -f libtool
+
+distclean: distclean-recursive
+
+maintainer-clean-am: maintainer-clean-noinstLTLIBRARIES \
+ maintainer-clean-compile maintainer-clean-libtool \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+
+.PHONY: mostlyclean-noinstLTLIBRARIES distclean-noinstLTLIBRARIES \
+clean-noinstLTLIBRARIES maintainer-clean-noinstLTLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile mostlyclean-libtool distclean-libtool \
+clean-libtool maintainer-clean-libtool install-recursive \
+uninstall-recursive install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck install-exec-am \
+install-exec install-data-am install-data install-am install \
+uninstall-am uninstall all-redirect all-am all install-strip \
+installdirs-am installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+# *.s are not preprocessed at all.
+.s.o:
+ $(CCAS) $(COMPILE_FLAGS) $<
+.s.obj:
+ $(CCAS) $(COMPILE_FLAGS) `cygpath -w $<`
+.s.lo:
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) $<
+
+# *.S are preprocessed with CPP.
+.S.o:
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.S.obj:
+ $(CPP) $(COMPILE_FLAGS) `cygpath -w $<` | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+
+# We have to rebuild the static object file without passing -DPIC to
+# preprocessor. The overhead cost is one extra assemblation. FIXME:
+# Teach libtool how to assemble with a preprocessor pass (CPP or m4).
+
+.S.lo:
+ $(CPP) $(COMPILE_FLAGS) -DPIC $< | grep -v '^#' >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
+
+# *.m4 are preprocessed with m4.
+.asm.o:
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.obj:
+ $(M4) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.lo:
+ $(M4) -DPIC -DOPERATION_$* $< >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rts/gmp/mpn/README b/rts/gmp/mpn/README
new file mode 100644
index 0000000000..7453c9d03e
--- /dev/null
+++ b/rts/gmp/mpn/README
@@ -0,0 +1,13 @@
+This directory contains all code for the mpn layer of GMP.
+
+Most subdirectories contain machine-dependent code, written in assembly or C.
+The `generic' subdirectory contains default code, used when there is no
+machine-dependent replacement for a particular machine.
+
+There is one subdirectory for each ISA family. Note that e.g., 32-bit SPARC
+and 64-bit SPARC are very different ISA's, and thus cannot share any code.
+
+A particular compile will only use code from one subdirectory, and the
+`generic' subdirectory. The ISA-specific subdirectories contain hierachies of
+directories for various architecture variants and implementations; the
+top-most level contains code that runs correctly on all variants.
diff --git a/rts/gmp/mpn/a29k/add_n.s b/rts/gmp/mpn/a29k/add_n.s
new file mode 100644
index 0000000000..e3ee6dfa60
--- /dev/null
+++ b/rts/gmp/mpn/a29k/add_n.s
@@ -0,0 +1,120 @@
+; 29000 __gmpn_add -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers. gr116 is used for this purpose. Note that
+; gr116==0 means that carry should be set.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_add_n
+ .word 0x60000
+___gmpn_add_n:
+ srl gr117,lr5,3
+ sub gr118,gr117,1
+ jmpt gr118,Ltail
+ constn gr116,-1 ; init cy reg
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+ mtsrim cr,(8-1)
+ loadm 0,0,gr104,lr4
+ add lr4,lr4,32
+
+ subr gr116,gr116,0 ; restore carry
+ addc gr96,gr96,gr104
+ addc gr97,gr97,gr105
+ addc gr98,gr98,gr106
+ addc gr99,gr99,gr107
+ addc gr100,gr100,gr108
+ addc gr101,gr101,gr109
+ addc gr102,gr102,gr110
+ addc gr103,gr103,gr111
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr96,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+ and lr5,lr5,(8-1)
+Ltail: sub gr118,lr5,1 ; count for CR
+ jmpt gr118,Lend
+ sub gr117,lr5,2 ; count for jmpfdec
+
+ mtsr cr,gr118
+ loadm 0,0,gr96,lr3
+ mtsr cr,gr118
+ loadm 0,0,gr104,lr4
+
+ subr gr116,gr116,0 ; restore carry
+
+ jmpfdec gr117,L1
+ addc gr96,gr96,gr104
+ jmp Lstore
+ mtsr cr,gr118
+L1: jmpfdec gr117,L2
+ addc gr97,gr97,gr105
+ jmp Lstore
+ mtsr cr,gr118
+L2: jmpfdec gr117,L3
+ addc gr98,gr98,gr106
+ jmp Lstore
+ mtsr cr,gr118
+L3: jmpfdec gr117,L4
+ addc gr99,gr99,gr107
+ jmp Lstore
+ mtsr cr,gr118
+L4: jmpfdec gr117,L5
+ addc gr100,gr100,gr108
+ jmp Lstore
+ mtsr cr,gr118
+L5: jmpfdec gr117,L6
+ addc gr101,gr101,gr109
+ jmp Lstore
+ mtsr cr,gr118
+L6: addc gr102,gr102,gr110
+
+Lstore: storem 0,0,gr96,lr2
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+Lend: jmpi lr0
+ add gr96,gr116,1
diff --git a/rts/gmp/mpn/a29k/addmul_1.s b/rts/gmp/mpn/a29k/addmul_1.s
new file mode 100644
index 0000000000..f51b6d7af6
--- /dev/null
+++ b/rts/gmp/mpn/a29k/addmul_1.s
@@ -0,0 +1,113 @@
+; 29000 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; add the product to a second limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; size lr4
+; s2_limb lr5
+
+ .cputype 29050
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_addmul_1
+ .word 0x60000
+___gmpn_addmul_1:
+ sub lr4,lr4,8
+ jmpt lr4,Ltail
+ const gr120,0 ; init cylimb reg
+
+ srl gr117,lr4,3 ; divide by 8
+ sub gr117,gr117,1 ; count for jmpfdec
+
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+
+ multiplu gr104,gr96,lr5
+ multmu gr96,gr96,lr5
+ multiplu gr105,gr97,lr5
+ multmu gr97,gr97,lr5
+ multiplu gr106,gr98,lr5
+ multmu gr98,gr98,lr5
+ multiplu gr107,gr99,lr5
+ multmu gr99,gr99,lr5
+ multiplu gr108,gr100,lr5
+ multmu gr100,gr100,lr5
+ multiplu gr109,gr101,lr5
+ multmu gr101,gr101,lr5
+ multiplu gr110,gr102,lr5
+ multmu gr102,gr102,lr5
+ multiplu gr111,gr103,lr5
+ multmu gr103,gr103,lr5
+
+ add gr104,gr104,gr120
+ addc gr105,gr105,gr96
+ addc gr106,gr106,gr97
+ addc gr107,gr107,gr98
+ addc gr108,gr108,gr99
+ addc gr109,gr109,gr100
+ addc gr110,gr110,gr101
+ addc gr111,gr111,gr102
+ addc gr120,gr103,0
+
+ mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr2
+
+ add gr104,gr96,gr104
+ addc gr105,gr97,gr105
+ addc gr106,gr98,gr106
+ addc gr107,gr99,gr107
+ addc gr108,gr100,gr108
+ addc gr109,gr101,gr109
+ addc gr110,gr102,gr110
+ addc gr111,gr103,gr111
+ addc gr120,gr120,0
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr104,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+Ltail: and lr4,lr4,(8-1)
+ sub gr118,lr4,1 ; count for CR
+ jmpt gr118,Lend
+ sub lr4,lr4,2
+ sub lr2,lr2,4 ; offset res_ptr by one limb
+
+Loop2: load 0,0,gr116,lr3
+ add lr3,lr3,4
+ multiplu gr117,gr116,lr5
+ multmu gr118,gr116,lr5
+ add lr2,lr2,4
+ load 0,0,gr119,lr2
+ add gr117,gr117,gr120
+ addc gr118,gr118,0
+ add gr117,gr117,gr119
+ store 0,0,gr117,lr2
+ jmpfdec lr4,Loop2
+ addc gr120,gr118,0
+
+Lend: jmpi lr0
+ or gr96,gr120,0 ; copy
diff --git a/rts/gmp/mpn/a29k/lshift.s b/rts/gmp/mpn/a29k/lshift.s
new file mode 100644
index 0000000000..93e1917127
--- /dev/null
+++ b/rts/gmp/mpn/a29k/lshift.s
@@ -0,0 +1,93 @@
+; 29000 __gmpn_lshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_lshift
+ .word 0x60000
+___gmpn_lshift:
+ sll gr116,lr4,2
+ add lr3,gr116,lr3
+ add lr2,gr116,lr2
+ sub lr3,lr3,4
+ load 0,0,gr119,lr3
+
+ subr gr116,lr5,32
+ srl gr96,gr119,gr116 ; return value
+ sub lr4,lr4,1 ; actual loop count is SIZE - 1
+
+ srl gr117,lr4,3 ; chuck count = (actual count) / 8
+ cpeq gr118,gr117,0
+ jmpt gr118,Ltail
+ mtsr fc,lr5
+
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: sub lr3,lr3,32
+ mtsrim cr,(8-1)
+ loadm 0,0,gr100,lr3
+
+ extract gr109,gr119,gr107
+ extract gr108,gr107,gr106
+ extract gr107,gr106,gr105
+ extract gr106,gr105,gr104
+ extract gr105,gr104,gr103
+ extract gr104,gr103,gr102
+ extract gr103,gr102,gr101
+ extract gr102,gr101,gr100
+
+ sub lr2,lr2,32
+ mtsrim cr,(8-1)
+ storem 0,0,gr102,lr2
+ jmpfdec gr117,Loop
+ or gr119,gr100,0
+
+; Code for the last up-to-7 limbs.
+
+ and lr4,lr4,(8-1)
+Ltail: cpeq gr118,lr4,0
+ jmpt gr118,Lend
+ sub lr4,lr4,2 ; count for jmpfdec
+
+Loop2: sub lr3,lr3,4
+ load 0,0,gr116,lr3
+ extract gr117,gr119,gr116
+ sub lr2,lr2,4
+ store 0,0,gr117,lr2
+ jmpfdec lr4,Loop2
+ or gr119,gr116,0
+
+Lend: extract gr117,gr119,0
+ sub lr2,lr2,4
+ jmpi lr0
+ store 0,0,gr117,lr2
diff --git a/rts/gmp/mpn/a29k/mul_1.s b/rts/gmp/mpn/a29k/mul_1.s
new file mode 100644
index 0000000000..6bcf7ce0cf
--- /dev/null
+++ b/rts/gmp/mpn/a29k/mul_1.s
@@ -0,0 +1,97 @@
+; 29000 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; size lr4
+; s2_limb lr5
+
+ .cputype 29050
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_mul_1
+ .word 0x60000
+___gmpn_mul_1:
+ sub lr4,lr4,8
+ jmpt lr4,Ltail
+ const gr120,0 ; init cylimb reg
+
+ srl gr117,lr4,3 ; divide by 8
+ sub gr117,gr117,1 ; count for jmpfdec
+
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+
+ multiplu gr104,gr96,lr5
+ multmu gr96,gr96,lr5
+ multiplu gr105,gr97,lr5
+ multmu gr97,gr97,lr5
+ multiplu gr106,gr98,lr5
+ multmu gr98,gr98,lr5
+ multiplu gr107,gr99,lr5
+ multmu gr99,gr99,lr5
+ multiplu gr108,gr100,lr5
+ multmu gr100,gr100,lr5
+ multiplu gr109,gr101,lr5
+ multmu gr101,gr101,lr5
+ multiplu gr110,gr102,lr5
+ multmu gr102,gr102,lr5
+ multiplu gr111,gr103,lr5
+ multmu gr103,gr103,lr5
+
+ add gr104,gr104,gr120
+ addc gr105,gr105,gr96
+ addc gr106,gr106,gr97
+ addc gr107,gr107,gr98
+ addc gr108,gr108,gr99
+ addc gr109,gr109,gr100
+ addc gr110,gr110,gr101
+ addc gr111,gr111,gr102
+ addc gr120,gr103,0
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr104,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+Ltail: and lr4,lr4,(8-1)
+ sub gr118,lr4,1 ; count for CR
+ jmpt gr118,Lend
+ sub lr4,lr4,2
+ sub lr2,lr2,4 ; offset res_ptr by one limb
+
+Loop2: load 0,0,gr116,lr3
+ add lr3,lr3,4
+ multiplu gr117,gr116,lr5
+ multmu gr118,gr116,lr5
+ add lr2,lr2,4
+ add gr117,gr117,gr120
+ store 0,0,gr117,lr2
+ jmpfdec lr4,Loop2
+ addc gr120,gr118,0
+
+Lend: jmpi lr0
+ or gr96,gr120,0 ; copy
diff --git a/rts/gmp/mpn/a29k/rshift.s b/rts/gmp/mpn/a29k/rshift.s
new file mode 100644
index 0000000000..ea163bff2b
--- /dev/null
+++ b/rts/gmp/mpn/a29k/rshift.s
@@ -0,0 +1,89 @@
+; 29000 __gmpn_rshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_rshift
+ .word 0x60000
+___gmpn_rshift:
+ load 0,0,gr119,lr3
+ add lr3,lr3,4
+
+ subr gr116,lr5,32
+ sll gr96,gr119,gr116 ; return value
+ sub lr4,lr4,1 ; actual loop count is SIZE - 1
+
+ srl gr117,lr4,3 ; chuck count = (actual count) / 8
+ cpeq gr118,gr117,0
+ jmpt gr118,Ltail
+ mtsr fc,gr116
+
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr100,lr3
+ add lr3,lr3,32
+
+ extract gr98,gr100,gr119
+ extract gr99,gr101,gr100
+ extract gr100,gr102,gr101
+ extract gr101,gr103,gr102
+ extract gr102,gr104,gr103
+ extract gr103,gr105,gr104
+ extract gr104,gr106,gr105
+ extract gr105,gr107,gr106
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr98,lr2
+ add lr2,lr2,32
+ jmpfdec gr117,Loop
+ or gr119,gr107,0
+
+; Code for the last up-to-7 limbs.
+
+ and lr4,lr4,(8-1)
+Ltail: cpeq gr118,lr4,0
+ jmpt gr118,Lend
+ sub lr4,lr4,2 ; count for jmpfdec
+
+Loop2: load 0,0,gr100,lr3
+ add lr3,lr3,4
+ extract gr117,gr100,gr119
+ store 0,0,gr117,lr2
+ add lr2,lr2,4
+ jmpfdec lr4,Loop2
+ or gr119,gr100,0
+
+Lend: srl gr117,gr119,lr5
+ jmpi lr0
+ store 0,0,gr117,lr2
diff --git a/rts/gmp/mpn/a29k/sub_n.s b/rts/gmp/mpn/a29k/sub_n.s
new file mode 100644
index 0000000000..c6b64c5bee
--- /dev/null
+++ b/rts/gmp/mpn/a29k/sub_n.s
@@ -0,0 +1,120 @@
+; 29000 __gmpn_sub -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers. gr116 is used for this purpose. Note that
+; gr116==0 means that carry should be set.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_sub_n
+ .word 0x60000
+___gmpn_sub_n:
+ srl gr117,lr5,3
+ sub gr118,gr117,1
+ jmpt gr118,Ltail
+ constn gr116,-1 ; init cy reg
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+ mtsrim cr,(8-1)
+ loadm 0,0,gr104,lr4
+ add lr4,lr4,32
+
+ subr gr116,gr116,0 ; restore carry
+ subc gr96,gr96,gr104
+ subc gr97,gr97,gr105
+ subc gr98,gr98,gr106
+ subc gr99,gr99,gr107
+ subc gr100,gr100,gr108
+ subc gr101,gr101,gr109
+ subc gr102,gr102,gr110
+ subc gr103,gr103,gr111
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr96,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+ and lr5,lr5,(8-1)
+Ltail: sub gr118,lr5,1 ; count for CR
+ jmpt gr118,Lend
+ sub gr117,lr5,2 ; count for jmpfdec
+
+ mtsr cr,gr118
+ loadm 0,0,gr96,lr3
+ mtsr cr,gr118
+ loadm 0,0,gr104,lr4
+
+ subr gr116,gr116,0 ; restore carry
+
+ jmpfdec gr117,L1
+ subc gr96,gr96,gr104
+ jmp Lstore
+ mtsr cr,gr118
+L1: jmpfdec gr117,L2
+ subc gr97,gr97,gr105
+ jmp Lstore
+ mtsr cr,gr118
+L2: jmpfdec gr117,L3
+ subc gr98,gr98,gr106
+ jmp Lstore
+ mtsr cr,gr118
+L3: jmpfdec gr117,L4
+ subc gr99,gr99,gr107
+ jmp Lstore
+ mtsr cr,gr118
+L4: jmpfdec gr117,L5
+ subc gr100,gr100,gr108
+ jmp Lstore
+ mtsr cr,gr118
+L5: jmpfdec gr117,L6
+ subc gr101,gr101,gr109
+ jmp Lstore
+ mtsr cr,gr118
+L6: subc gr102,gr102,gr110
+
+Lstore: storem 0,0,gr96,lr2
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+Lend: jmpi lr0
+ add gr96,gr116,1
diff --git a/rts/gmp/mpn/a29k/submul_1.s b/rts/gmp/mpn/a29k/submul_1.s
new file mode 100644
index 0000000000..ef97d8d4e5
--- /dev/null
+++ b/rts/gmp/mpn/a29k/submul_1.s
@@ -0,0 +1,116 @@
+; 29000 __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+; subtract the product from a second limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; size lr4
+; s2_limb lr5
+
+ .cputype 29050
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_submul_1
+ .word 0x60000
+___gmpn_submul_1:
+ sub lr4,lr4,8
+ jmpt lr4,Ltail
+ const gr120,0 ; init cylimb reg
+
+ srl gr117,lr4,3 ; divide by 8
+ sub gr117,gr117,1 ; count for jmpfdec
+
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+
+ multiplu gr104,gr96,lr5
+ multmu gr96,gr96,lr5
+ multiplu gr105,gr97,lr5
+ multmu gr97,gr97,lr5
+ multiplu gr106,gr98,lr5
+ multmu gr98,gr98,lr5
+ multiplu gr107,gr99,lr5
+ multmu gr99,gr99,lr5
+ multiplu gr108,gr100,lr5
+ multmu gr100,gr100,lr5
+ multiplu gr109,gr101,lr5
+ multmu gr101,gr101,lr5
+ multiplu gr110,gr102,lr5
+ multmu gr102,gr102,lr5
+ multiplu gr111,gr103,lr5
+ multmu gr103,gr103,lr5
+
+ add gr104,gr104,gr120
+ addc gr105,gr105,gr96
+ addc gr106,gr106,gr97
+ addc gr107,gr107,gr98
+ addc gr108,gr108,gr99
+ addc gr109,gr109,gr100
+ addc gr110,gr110,gr101
+ addc gr111,gr111,gr102
+ addc gr120,gr103,0
+
+ mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr2
+
+ sub gr96,gr96,gr104
+ subc gr97,gr97,gr105
+ subc gr98,gr98,gr106
+ subc gr99,gr99,gr107
+ subc gr100,gr100,gr108
+ subc gr101,gr101,gr109
+ subc gr102,gr102,gr110
+ subc gr103,gr103,gr111
+
+ add gr104,gr103,gr111 ; invert carry from previus sub
+ addc gr120,gr120,0
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr96,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+Ltail: and lr4,lr4,(8-1)
+ sub gr118,lr4,1 ; count for CR
+ jmpt gr118,Lend
+ sub lr4,lr4,2
+ sub lr2,lr2,4 ; offset res_ptr by one limb
+
+Loop2: load 0,0,gr116,lr3
+ add lr3,lr3,4
+ multiplu gr117,gr116,lr5
+ multmu gr118,gr116,lr5
+ add lr2,lr2,4
+ load 0,0,gr119,lr2
+ add gr117,gr117,gr120
+ addc gr118,gr118,0
+ sub gr119,gr119,gr117
+ add gr104,gr119,gr117 ; invert carry from previus sub
+ store 0,0,gr119,lr2
+ jmpfdec lr4,Loop2
+ addc gr120,gr118,0
+
+Lend: jmpi lr0
+ or gr96,gr120,0 ; copy
diff --git a/rts/gmp/mpn/a29k/udiv.s b/rts/gmp/mpn/a29k/udiv.s
new file mode 100644
index 0000000000..fdd53a9a88
--- /dev/null
+++ b/rts/gmp/mpn/a29k/udiv.s
@@ -0,0 +1,30 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___udiv_qrnnd
+ .word 0x60000
+___udiv_qrnnd:
+ mtsr q,lr3
+ dividu gr96,lr4,lr5
+ mfsr gr116,q
+ jmpi lr0
+ store 0,0,gr116,lr2
diff --git a/rts/gmp/mpn/a29k/umul.s b/rts/gmp/mpn/a29k/umul.s
new file mode 100644
index 0000000000..7741981167
--- /dev/null
+++ b/rts/gmp/mpn/a29k/umul.s
@@ -0,0 +1,29 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___umul_ppmm
+ .word 0x50000
+___umul_ppmm:
+ multiplu gr116,lr3,lr4
+ multmu gr96,lr3,lr4
+ jmpi lr0
+ store 0,0,gr116,lr2
diff --git a/rts/gmp/mpn/alpha/README b/rts/gmp/mpn/alpha/README
new file mode 100644
index 0000000000..744260c7c5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/README
@@ -0,0 +1,224 @@
+This directory contains mpn functions optimized for DEC Alpha processors.
+
+ALPHA ASSEMBLY RULES AND REGULATIONS
+
+The `.prologue N' pseudo op marks the end of instruction that needs
+special handling by unwinding. It also says whether $27 is really
+needed for computing the gp. The `.mask M' pseudo op says which
+registers are saved on the stack, and at what offset in the frame.
+
+Cray code is very very different...
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+EV4
+
+1. This chip has very limited store bandwidth. The on-chip L1 cache is
+ write-through, and a cache line is transfered from the store buffer to
+ the off-chip L2 in as much 15 cycles on most systems. This delay hurts
+ mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift.
+
+2. Pairing is possible between memory instructions and integer arithmetic
+ instructions.
+
+3. mulq and umulh are documented to have a latency of 23 cycles, but 2 of
+ these cycles are pipelined. Thus, multiply instructions can be issued at
+ a rate of one each 21st cycle.
+
+EV5
+
+1. The memory bandwidth of this chip seems excellent, both for loads and
+ stores. Even when the working set is larger than the on-chip L1 and L2
+ caches, the performance remain almost unaffected.
+
+2. mulq has a latency of 12 cycles and an issue rate of 1 each 8th cycle.
+ umulh has a measured latency of 14 cycles and an issue rate of 1 each
+ 10th cycle. But the exact timing is somewhat confusing.
+
+3. mpn_add_n. With 4-fold unrolling, we need 37 instructions, whereof 12
+ are memory operations. This will take at least
+ ceil(37/2) [dual issue] + 1 [taken branch] = 19 cycles
+ We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data
+ cache cycles, which should be completely hidden in the 19 issue cycles.
+ The computation is inherently serial, with these dependencies:
+
+ ldq ldq
+ \ /\
+ (or) addq |
+ |\ / \ |
+ | addq cmpult
+ \ | |
+ cmpult |
+ \ /
+ or
+
+ I.e., 3 operations are needed between carry-in and carry-out, making 12
+ cycles the absolute minimum for the 4 limbs. We could replace the `or'
+ with a cmoveq/cmovne, which could issue one cycle earlier that the `or',
+ but that might waste a cycle on EV4. The total depth remain unaffected,
+ since cmov has a latency of 2 cycles.
+
+ addq
+ / \
+ addq cmpult
+ | \
+ cmpult -> cmovne
+
+Montgomery has a slightly different way of computing carry that requires one
+less instruction, but has depth 4 (instead of the current 3). Since the
+code is currently instruction issue bound, Montgomery's idea should save us
+1/2 cycle per limb, or bring us down to a total of 17 cycles or 4.25
+cycles/limb. Unfortunately, this method will not be good for the EV6.
+
+EV6
+
+Here we have a really parallel pipeline, capable of issuing up to 4 integer
+instructions per cycle. One integer multiply instruction can issue each
+cycle. To get optimal speed, we need to pretend we are vectorizing the code,
+i.e., minimize the iterative dependencies.
+
+There are two dependencies to watch out for. 1) Address arithmetic
+dependencies, and 2) carry propagation dependencies.
+
+We can avoid serializing due to address arithmetic by unrolling the loop, so
+that addresses don't depend heavily on an index variable. Avoiding
+serializing because of carry propagation is trickier; the ultimate performance
+of the code will be determined of the number of latency cycles it takes from
+accepting carry-in to a vector point until we can generate carry-out.
+
+Most integer instructions can execute in either the L0, U0, L1, or U1
+pipelines. Shifts only execute in U0 and U1, and multiply only in U1.
+
+CMOV instructions split into two internal instructions, CMOV1 and CMOV2, but
+the execute efficiently. But CMOV split the mapping process (see pg 2-26 in
+cmpwrgd.pdf), suggesting the CMOV should always be placed as the last
+instruction of an aligned 4 instruction block (?).
+
+Perhaps the most important issue is the latency between the L0/U0 and L1/U1
+clusters; a result obtained on either cluster has an extra cycle of latency
+for consumers in the opposite cluster. Because of the dynamic nature of the
+implementation, it is hard to predict where an instruction will execute.
+
+The shift loops need (per limb):
+ 1 load (Lx pipes)
+ 1 store (Lx pipes)
+ 2 shift (Ux pipes)
+ 1 iaddlog (Lx pipes, Ux pipes)
+Obviously, since the pipes are very equally loaded, we should get 4 insn/cycle, or 1.25 cycles/limb.
+
+For mpn_add_n, we currently have
+ 2 load (Lx pipes)
+ 1 store (Lx pipes)
+ 5 iaddlog (Lx pipes, Ux pipes)
+
+Again, we have a perfect balance and will be limited by carry propagation
+delays, currently three cycles. The superoptimizer indicates that ther
+might be sequences that--using a final cmov--have a carry propagation delay
+of just two. Montgomery's subtraction sequence could perhaps be used, by
+complementing some operands. All in all, we should get down to 2 cycles
+without much problems.
+
+For mpn_mul_1, we could do, just like for mpn_add_n:
+ not newlo,notnewlo
+ addq cylimb,newlo,newlo || cmpult cylimb,notnewlo,cyout
+ addq cyout,newhi,cylimb
+and get 2-cycle carry propagation. The instructions needed will be
+ 1 ld (Lx pipes)
+ 1 st (Lx pipes)
+ 2 mul (U1 pipe)
+ 4 iaddlog (Lx pipes, Ux pipes)
+issue1: addq not mul ld
+issue2: cmpult addq mul st
+Conclusion: no cluster delays and 2-cycle carry delays will give us 2 cycles/limb!
+
+Last, we have mpn_addmul_1. Almost certainly, we will get down to 3
+cycles/limb, which would be absolutely awesome.
+
+Old, perhaps obsolete addmul_1 dependency diagram (needs 175 columns wide screen):
+
+ i
+ s
+ s i
+ u n
+ e s
+ d t
+ r
+ i u
+l n c
+i s t
+v t i
+e r o
+ u n
+v c
+a t t
+l i y
+u o p
+e n e
+s s s
+ issue
+ in
+ cycle
+ -1 ldq
+ / \
+ 0 | \
+ | \
+ 1 | |
+ | |
+ 2 | | ldq
+ | | / \
+ 3 | mulq | \
+ | \ | \
+ 4 umulh \ | |
+ | | | |
+ 5 | | | | ldq
+ | | | | / \
+ 4calm 6 | | ldq | mulq | \
+ | | / | \ | \
+ 4casm 7 | | / umulh \ | |
+6 | || | | | |
+ 3aal 8 | || | | | | ldq
+7 | || | | | | / \
+ 4calm 9 | || | | ldq | mulq | \
+9 | || | | / | \ | \
+ 4casm 10 | || | | / umulh \ | |
+9 | || | || | | | |
+ 3aal 11 | addq | || | | | | ldq
+9 | // \ | || | | | | / \
+ 4calm 12 \ cmpult addq<-cy | || | | ldq | mulq | \
+13 \ / // \ | || | | / | \ | \
+ 4casm 13 addq cmpult stq | || | | / umulh \ | |
+11 \ / | || | || | | | |
+ 3aal 14 addq | addq | || | | | | ldq
+10 \ | // \ | || | | | | / \
+ 4calm 15 cy ----> \ cmpult addq<-cy | || | | ldq | mulq | \
+13 \ / // \ | || | | / | \ | \
+ 4casm 16 addq cmpult stq | || | | / umulh \ | |
+11 \ / | || | || | | | |
+ 3aal 17 addq | addq | || | | | |
+10 \ | // \ | || | | | |
+ 4calm 18 cy ----> \ cmpult addq<-cy | || | | ldq | mulq
+13 \ / // \ | || | | / | \
+ 4casm 19 addq cmpult stq | || | | / umulh \
+11 \ / | || | || | |
+ 3aal 20 addq | addq | || | |
+10 \ | // \ | || | |
+ 4calm 21 cy ----> \ cmpult addq<-cy | || | | ldq
+ \ / // \ | || | | /
+ 22 addq cmpult stq | || | | /
+ \ / | || | ||
+ 23 addq | addq | ||
+ \ | // \ | ||
+ 24 cy ----> \ cmpult addq<-cy | ||
+ \ / // \ | ||
+ 25 addq cmpult stq | ||
+ \ / | ||
+ 26 addq | addq
+ \ | // \
+ 27 cy ----> \ cmpult addq<-cy
+ \ / // \
+ 28 addq cmpult stq
+ \ /
+As many as 6 consecutive points will be under execution simultaneously, or if we addq
+schedule loads even further away, maybe 7 or 8. But the number of live quantities \
+is reasonable, and can easily be satisfied. cy ---->
diff --git a/rts/gmp/mpn/alpha/add_n.asm b/rts/gmp/mpn/alpha/add_n.asm
new file mode 100644
index 0000000000..08d6a9f7b8
--- /dev/null
+++ b/rts/gmp/mpn/alpha/add_n.asm
@@ -0,0 +1,114 @@
+dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ ldq r3,0(r17)
+ ldq r4,0(r18)
+
+ subq r19,1,r19
+ and r19,4-1,r2 C number of limbs in first loop
+ bis r31,r31,r0
+ beq r2,$L0 C if multiple of 4 limbs, skip first loop
+
+ subq r19,r2,r19
+
+$Loop0: subq r2,1,r2
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ addq r17,8,r17
+ addq r18,8,r18
+ bis r5,r5,r3
+ bis r6,r6,r4
+ addq r16,8,r16
+ bne r2,$Loop0
+
+$L0: beq r19,$Lend
+
+ ALIGN(8)
+$Loop: subq r19,4,r19
+
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ ldq r3,16(r17)
+ addq r6,r0,r6
+ ldq r4,16(r18)
+ cmpult r6,r0,r1
+ addq r5,r6,r6
+ cmpult r6,r5,r0
+ stq r6,8(r16)
+ bis r0,r1,r0
+
+ ldq r5,24(r17)
+ addq r4,r0,r4
+ ldq r6,24(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,16(r16)
+ bis r0,r1,r0
+
+ ldq r3,32(r17)
+ addq r6,r0,r6
+ ldq r4,32(r18)
+ cmpult r6,r0,r1
+ addq r5,r6,r6
+ cmpult r6,r5,r0
+ stq r6,24(r16)
+ bis r0,r1,r0
+
+ addq r17,32,r17
+ addq r18,32,r18
+ addq r16,32,r16
+ bne r19,$Loop
+
+$Lend: addq r4,r0,r4
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/addmul_1.asm b/rts/gmp/mpn/alpha/addmul_1.asm
new file mode 100644
index 0000000000..4ea900be6b
--- /dev/null
+++ b/rts/gmp/mpn/alpha/addmul_1.asm
@@ -0,0 +1,87 @@
+dnl Alpha __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ umulh r2,r19,r0 C r0 = prod_high
+ beq r18,$Lend1 C jump if size was == 1
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ addq r5,r3,r3
+ cmpult r3,r5,r4
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ subq r18,1,r18 C size--
+ umulh r2,r19,r4 C r4 = cy_limb
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5,r3,r3
+ cmpult r3,r5,r5
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ addq r5,r0,r0 C combine carries
+ bne r18,$Loop
+
+$Lend2: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ umulh r2,r19,r4 C r4 = cy_limb
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5,r3,r3
+ cmpult r3,r5,r5
+ stq r3,0(r16)
+ addq r5,r0,r0 C combine carries
+ addq r4,r0,r0 C cy_limb = prod_high + cy
+ ret r31,(r26),1
+$Lend1: addq r5,r3,r3
+ cmpult r3,r5,r5
+ stq r3,0(r16)
+ addq r0,r5,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_addmul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/cntlz.asm b/rts/gmp/mpn/alpha/cntlz.asm
new file mode 100644
index 0000000000..febb3b70d9
--- /dev/null
+++ b/rts/gmp/mpn/alpha/cntlz.asm
@@ -0,0 +1,68 @@
+dnl Alpha auxiliary for longlong.h's count_leading_zeros
+
+dnl Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl DISCUSSION:
+
+dnl Other methods have been tried, and using a 128-entry table actually trims
+dnl about 10% of the execution time (on a 21164) when the table is in the L1
+dnl cache. But under non-benchmarking conditions, the table will hardly be in
+dnl the L1 cache. Tricky bit-fiddling methods with multiplies and magic tables
+dnl are also possible, but they require many more instructions than the current
+dnl code. (But for count_trailing_zeros, such tricks are beneficial.)
+dnl Finally, converting to floating-point and extracting the exponent is much
+dnl slower.
+
+ASM_START()
+PROLOGUE(MPN(count_leading_zeros))
+ bis r31,63,r0 C initialize partial result count
+
+ srl r16,32,r1 C shift down 32 steps -> r1
+ cmovne r1,r1,r16 C select r1 if non-zero
+ cmovne r1,31,r0 C if r1 is nonzero choose smaller count
+
+ srl r16,16,r1 C shift down 16 steps -> r1
+ subq r0,16,r2 C generate new partial result count
+ cmovne r1,r1,r16 C choose new r1 if non-zero
+ cmovne r1,r2,r0 C choose new count if r1 was non-zero
+
+ srl r16,8,r1
+ subq r0,8,r2
+ cmovne r1,r1,r16
+ cmovne r1,r2,r0
+
+ srl r16,4,r1
+ subq r0,4,r2
+ cmovne r1,r1,r16
+ cmovne r1,r2,r0
+
+ srl r16,2,r1
+ subq r0,2,r2
+ cmovne r1,r1,r16
+ cmovne r1,r2,r0
+
+ srl r16,1,r1 C extract bit 1
+ subq r0,r1,r0 C subtract it from partial result
+
+ ret r31,(r26),1
+EPILOGUE(MPN(count_leading_zeros))
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/default.m4 b/rts/gmp/mpn/alpha/default.m4
new file mode 100644
index 0000000000..5f4c48dc73
--- /dev/null
+++ b/rts/gmp/mpn/alpha/default.m4
@@ -0,0 +1,77 @@
+divert(-1)
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+define(`ASM_START',
+ `
+ .set noreorder
+ .set noat')
+
+define(`X',`0x$1')
+define(`FLOAT64',
+ `
+ .align 3
+$1: .t_floating $2')
+
+define(`PROLOGUE',
+ `
+ .text
+ .align 3
+ .globl $1
+ .ent $1
+$1:
+ .frame r30,0,r26
+ .prologue 0')
+
+define(`PROLOGUE_GP',
+ `
+ .text
+ .align 3
+ .globl $1
+ .ent $1
+$1:
+ ldgp r29,0(r27)
+ .frame r30,0,r26
+ .prologue 1')
+
+define(`EPILOGUE',
+ `
+ .end $1')
+
+dnl Map register names r0, r1, etc, to `$0', `$1', etc.
+dnl This is needed on all systems but Unicos
+forloop(i,0,31,
+`define(`r'i,``$''i)'
+)
+forloop(i,0,31,
+`define(`f'i,``$f''i)'
+)
+
+define(`DATASTART',
+ `dnl
+ DATA
+$1:')
+define(`DATAEND',`dnl')
+
+define(`ASM_END',`dnl')
+
+divert
diff --git a/rts/gmp/mpn/alpha/ev5/add_n.asm b/rts/gmp/mpn/alpha/ev5/add_n.asm
new file mode 100644
index 0000000000..716d6404ae
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/add_n.asm
@@ -0,0 +1,143 @@
+dnl Alpha EV5 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ bis r31,r31,r25 C clear cy
+ subq r19,4,r19 C decr loop cnt
+ blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ ldq r1,8(r18)
+ ldq r5,8(r17)
+ addq r17,32,r17 C update s1_ptr
+ ldq r2,16(r18)
+ addq r0,r4,r20 C 1st main add
+ ldq r3,24(r18)
+ subq r19,4,r19 C decr loop cnt
+ ldq r6,-16(r17)
+ cmpult r20,r0,r25 C compute cy from last add
+ ldq r7,-8(r17)
+ addq r1,r5,r28 C 2nd main add
+ addq r18,32,r18 C update s2_ptr
+ addq r28,r25,r21 C 2nd carry add
+ cmpult r28,r5,r8 C compute cy from last add
+ blt r19,$Lend1 C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+ ALIGN(16)
+$Loop: cmpult r21,r28,r25 C compute cy from last add
+ ldq r0,0(r18)
+ bis r8,r25,r25 C combine cy from the two adds
+ ldq r1,8(r18)
+ addq r2,r6,r28 C 3rd main add
+ ldq r4,0(r17)
+ addq r28,r25,r22 C 3rd carry add
+ ldq r5,8(r17)
+ cmpult r28,r6,r8 C compute cy from last add
+ cmpult r22,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+ stq r21,8(r16)
+ addq r3,r7,r28 C 4th main add
+ addq r28,r25,r23 C 4th carry add
+ cmpult r28,r7,r8 C compute cy from last add
+ cmpult r23,r28,r25 C compute cy from last add
+ addq r17,32,r17 C update s1_ptr
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,32,r16 C update res_ptr
+ addq r0,r4,r28 C 1st main add
+ ldq r2,16(r18)
+ addq r25,r28,r20 C 1st carry add
+ ldq r3,24(r18)
+ cmpult r28,r4,r8 C compute cy from last add
+ ldq r6,-16(r17)
+ cmpult r20,r28,r25 C compute cy from last add
+ ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two adds
+ subq r19,4,r19 C decr loop cnt
+ stq r22,-16(r16)
+ addq r1,r5,r28 C 2nd main add
+ stq r23,-8(r16)
+ addq r25,r28,r21 C 2nd carry add
+ addq r18,32,r18 C update s2_ptr
+ cmpult r28,r5,r8 C compute cy from last add
+ bge r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1: cmpult r21,r28,r25 C compute cy from last add
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r2,r6,r28 C 3rd main add
+ addq r28,r25,r22 C 3rd carry add
+ cmpult r28,r6,r8 C compute cy from last add
+ cmpult r22,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+ stq r21,8(r16)
+ addq r3,r7,r28 C 4th main add
+ addq r28,r25,r23 C 4th carry add
+ cmpult r28,r7,r8 C compute cy from last add
+ cmpult r23,r28,r25 C compute cy from last add
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,32,r16 C update res_ptr
+ stq r22,-16(r16)
+ stq r23,-8(r16)
+$Lend2: addq r19,4,r19 C restore loop cnt
+ beq r19,$Lret
+C Start software pipeline for 2nd loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ subq r19,1,r19
+ beq r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+ ALIGN(16)
+$Loop0: addq r0,r4,r28 C main add
+ ldq r0,8(r18)
+ cmpult r28,r4,r8 C compute cy from last add
+ ldq r4,8(r17)
+ addq r28,r25,r20 C carry add
+ addq r18,8,r18
+ addq r17,8,r17
+ stq r20,0(r16)
+ cmpult r20,r28,r25 C compute cy from last add
+ subq r19,1,r19 C decr loop cnt
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,8,r16
+ bne r19,$Loop0
+$Lend0: addq r0,r4,r28 C main add
+ addq r28,r25,r20 C carry add
+ cmpult r28,r4,r8 C compute cy from last add
+ cmpult r20,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+
+$Lret: bis r25,r31,r0 C return cy
+ ret r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev5/lshift.asm b/rts/gmp/mpn/alpha/ev5/lshift.asm
new file mode 100644
index 0000000000..cb181dda66
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/lshift.asm
@@ -0,0 +1,169 @@
+dnl Alpha EV5 __gmpn_lshift -- Shift a number left.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 3.25 cycles/limb on the EV5.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ s8addq r18,r17,r17 C make r17 point at end of s1
+ ldq r4,-8(r17) C load first limb
+ subq r31,r19,r20
+ s8addq r18,r16,r16 C make r16 point at end of RES
+ subq r18,1,r18
+ and r18,4-1,r28 C number of limbs in first loop
+ srl r4,r20,r0 C compute function result
+
+ beq r28,$L0
+ subq r18,r28,r18
+
+ ALIGN(8)
+$Loop0: ldq r3,-16(r17)
+ subq r16,8,r16
+ sll r4,r19,r5
+ subq r17,8,r17
+ subq r28,1,r28
+ srl r3,r20,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,0(r16)
+ bne r28,$Loop0
+
+$L0: sll r4,r19,r24
+ beq r18,$Lend
+C warm up phase 1
+ ldq r1,-16(r17)
+ subq r18,4,r18
+ ldq r2,-24(r17)
+ ldq r3,-32(r17)
+ ldq r4,-40(r17)
+ beq r18,$Lend1
+C warm up phase 2
+ srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ ldq r1,-48(r17)
+ sll r2,r19,r22
+ ldq r2,-56(r17)
+ srl r3,r20,r5
+ bis r7,r24,r7
+ sll r3,r19,r23
+ bis r8,r21,r8
+ srl r4,r20,r6
+ ldq r3,-64(r17)
+ sll r4,r19,r24
+ ldq r4,-72(r17)
+ subq r18,4,r18
+ beq r18,$Lend2
+ ALIGN(16)
+C main loop
+$Loop: stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+
+ srl r1,r20,r7
+ subq r18,4,r18
+ sll r1,r19,r21
+ unop C ldq r31,-96(r17)
+
+ srl r2,r20,r8
+ ldq r1,-80(r17)
+ sll r2,r19,r22
+ ldq r2,-88(r17)
+
+ stq r5,-24(r16)
+ bis r7,r24,r7
+ stq r6,-32(r16)
+ bis r8,r21,r8
+
+ srl r3,r20,r5
+ unop C ldq r31,-96(r17)
+ sll r3,r19,r23
+ subq r16,32,r16
+
+ srl r4,r20,r6
+ ldq r3,-96(r17)
+ sll r4,r19,r24
+ ldq r4,-104(r17)
+
+ subq r17,32,r17
+ bne r18,$Loop
+C cool down phase 2/1
+$Lend2: stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+ srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ sll r2,r19,r22
+ stq r5,-24(r16)
+ bis r7,r24,r7
+ stq r6,-32(r16)
+ bis r8,r21,r8
+ srl r3,r20,r5
+ sll r3,r19,r23
+ srl r4,r20,r6
+ sll r4,r19,r24
+C cool down phase 2/2
+ stq r7,-40(r16)
+ bis r5,r22,r5
+ stq r8,-48(r16)
+ bis r6,r23,r6
+ stq r5,-56(r16)
+ stq r6,-64(r16)
+C cool down phase 2/3
+ stq r24,-72(r16)
+ ret r31,(r26),1
+
+C cool down phase 1/1
+$Lend1: srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ sll r2,r19,r22
+ srl r3,r20,r5
+ bis r7,r24,r7
+ sll r3,r19,r23
+ bis r8,r21,r8
+ srl r4,r20,r6
+ sll r4,r19,r24
+C cool down phase 1/2
+ stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+ stq r5,-24(r16)
+ stq r6,-32(r16)
+ stq r24,-40(r16)
+ ret r31,(r26),1
+
+$Lend: stq r24,-8(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev5/rshift.asm b/rts/gmp/mpn/alpha/ev5/rshift.asm
new file mode 100644
index 0000000000..9940d83fad
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/rshift.asm
@@ -0,0 +1,167 @@
+dnl Alpha EV5 __gmpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 3.25 cycles/limb on the EV5.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ldq r4,0(r17) C load first limb
+ subq r31,r19,r20
+ subq r18,1,r18
+ and r18,4-1,r28 C number of limbs in first loop
+ sll r4,r20,r0 C compute function result
+
+ beq r28,$L0
+ subq r18,r28,r18
+
+ ALIGN(8)
+$Loop0: ldq r3,8(r17)
+ addq r16,8,r16
+ srl r4,r19,r5
+ addq r17,8,r17
+ subq r28,1,r28
+ sll r3,r20,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,-8(r16)
+ bne r28,$Loop0
+
+$L0: srl r4,r19,r24
+ beq r18,$Lend
+C warm up phase 1
+ ldq r1,8(r17)
+ subq r18,4,r18
+ ldq r2,16(r17)
+ ldq r3,24(r17)
+ ldq r4,32(r17)
+ beq r18,$Lend1
+C warm up phase 2
+ sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ ldq r1,40(r17)
+ srl r2,r19,r22
+ ldq r2,48(r17)
+ sll r3,r20,r5
+ bis r7,r24,r7
+ srl r3,r19,r23
+ bis r8,r21,r8
+ sll r4,r20,r6
+ ldq r3,56(r17)
+ srl r4,r19,r24
+ ldq r4,64(r17)
+ subq r18,4,r18
+ beq r18,$Lend2
+ ALIGN(16)
+C main loop
+$Loop: stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+
+ sll r1,r20,r7
+ subq r18,4,r18
+ srl r1,r19,r21
+ unop C ldq r31,-96(r17)
+
+ sll r2,r20,r8
+ ldq r1,72(r17)
+ srl r2,r19,r22
+ ldq r2,80(r17)
+
+ stq r5,16(r16)
+ bis r7,r24,r7
+ stq r6,24(r16)
+ bis r8,r21,r8
+
+ sll r3,r20,r5
+ unop C ldq r31,-96(r17)
+ srl r3,r19,r23
+ addq r16,32,r16
+
+ sll r4,r20,r6
+ ldq r3,88(r17)
+ srl r4,r19,r24
+ ldq r4,96(r17)
+
+ addq r17,32,r17
+ bne r18,$Loop
+C cool down phase 2/1
+$Lend2: stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+ sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ srl r2,r19,r22
+ stq r5,16(r16)
+ bis r7,r24,r7
+ stq r6,24(r16)
+ bis r8,r21,r8
+ sll r3,r20,r5
+ srl r3,r19,r23
+ sll r4,r20,r6
+ srl r4,r19,r24
+C cool down phase 2/2
+ stq r7,32(r16)
+ bis r5,r22,r5
+ stq r8,40(r16)
+ bis r6,r23,r6
+ stq r5,48(r16)
+ stq r6,56(r16)
+C cool down phase 2/3
+ stq r24,64(r16)
+ ret r31,(r26),1
+
+C cool down phase 1/1
+$Lend1: sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ srl r2,r19,r22
+ sll r3,r20,r5
+ bis r7,r24,r7
+ srl r3,r19,r23
+ bis r8,r21,r8
+ sll r4,r20,r6
+ srl r4,r19,r24
+C cool down phase 1/2
+ stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+ stq r5,16(r16)
+ stq r6,24(r16)
+ stq r24,32(r16)
+ ret r31,(r26),1
+
+$Lend: stq r24,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev5/sub_n.asm b/rts/gmp/mpn/alpha/ev5/sub_n.asm
new file mode 100644
index 0000000000..5248a2aa38
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/sub_n.asm
@@ -0,0 +1,143 @@
+dnl Alpha EV5 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl and store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ bis r31,r31,r25 C clear cy
+ subq r19,4,r19 C decr loop cnt
+ blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ ldq r1,8(r18)
+ ldq r5,8(r17)
+ addq r17,32,r17 C update s1_ptr
+ ldq r2,16(r18)
+ subq r4,r0,r20 C 1st main subtract
+ ldq r3,24(r18)
+ subq r19,4,r19 C decr loop cnt
+ ldq r6,-16(r17)
+ cmpult r4,r0,r25 C compute cy from last subtract
+ ldq r7,-8(r17)
+ subq r5,r1,r28 C 2nd main subtract
+ addq r18,32,r18 C update s2_ptr
+ subq r28,r25,r21 C 2nd carry subtract
+ cmpult r5,r1,r8 C compute cy from last subtract
+ blt r19,$Lend1 C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+ ALIGN(16)
+$Loop: cmpult r28,r25,r25 C compute cy from last subtract
+ ldq r0,0(r18)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ ldq r1,8(r18)
+ subq r6,r2,r28 C 3rd main subtract
+ ldq r4,0(r17)
+ subq r28,r25,r22 C 3rd carry subtract
+ ldq r5,8(r17)
+ cmpult r6,r2,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ stq r21,8(r16)
+ subq r7,r3,r28 C 4th main subtract
+ subq r28,r25,r23 C 4th carry subtract
+ cmpult r7,r3,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ addq r17,32,r17 C update s1_ptr
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,32,r16 C update res_ptr
+ subq r4,r0,r28 C 1st main subtract
+ ldq r2,16(r18)
+ subq r28,r25,r20 C 1st carry subtract
+ ldq r3,24(r18)
+ cmpult r4,r0,r8 C compute cy from last subtract
+ ldq r6,-16(r17)
+ cmpult r28,r25,r25 C compute cy from last subtract
+ ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r19,4,r19 C decr loop cnt
+ stq r22,-16(r16)
+ subq r5,r1,r28 C 2nd main subtract
+ stq r23,-8(r16)
+ subq r28,r25,r21 C 2nd carry subtract
+ addq r18,32,r18 C update s2_ptr
+ cmpult r5,r1,r8 C compute cy from last subtract
+ bge r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1: cmpult r28,r25,r25 C compute cy from last subtract
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r6,r2,r28 C cy add
+ subq r28,r25,r22 C 3rd main subtract
+ cmpult r6,r2,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ stq r21,8(r16)
+ subq r7,r3,r28 C cy add
+ subq r28,r25,r23 C 4th main subtract
+ cmpult r7,r3,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,32,r16 C update res_ptr
+ stq r22,-16(r16)
+ stq r23,-8(r16)
+$Lend2: addq r19,4,r19 C restore loop cnt
+ beq r19,$Lret
+C Start software pipeline for 2nd loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ subq r19,1,r19
+ beq r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+ ALIGN(16)
+$Loop0: subq r4,r0,r28 C main subtract
+ cmpult r4,r0,r8 C compute cy from last subtract
+ ldq r0,8(r18)
+ ldq r4,8(r17)
+ subq r28,r25,r20 C carry subtract
+ addq r18,8,r18
+ addq r17,8,r17
+ stq r20,0(r16)
+ cmpult r28,r25,r25 C compute cy from last subtract
+ subq r19,1,r19 C decr loop cnt
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,8,r16
+ bne r19,$Loop0
+$Lend0: subq r4,r0,r28 C main subtract
+ subq r28,r25,r20 C carry subtract
+ cmpult r4,r0,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+
+$Lret: bis r25,r31,r0 C return cy
+ ret r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev6/addmul_1.asm b/rts/gmp/mpn/alpha/ev6/addmul_1.asm
new file mode 100644
index 0000000000..2f588626a5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev6/addmul_1.asm
@@ -0,0 +1,474 @@
+dnl Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and
+dnl exactly 3.625 cycles/limb on EV6...
+
+dnl This code was written in close cooperation with ev6 pipeline expert
+dnl Steve Root (root@toober.hlo.dec.com). Any errors are tege's fault, though.
+dnl
+dnl Register usages for unrolled loop:
+dnl 0-3 mul's
+dnl 4-7 acc's
+dnl 8-15 mul results
+dnl 20,21 carry's
+dnl 22,23 save for stores
+
+dnl Sustains 8 mul-adds in 29 cycles in the unrolled inner loop.
+
+dnl The stores can issue a cycle late so we have paired no-op's to 'catch'
+dnl them, so that further disturbance to the schedule is damped.
+
+dnl We couldn't pair the loads, because the entangled schedule of the
+dnl carry's has to happen on one side {0} of the machine. Note, the total
+dnl use of U0, and the total use of L0 (after attending to the stores).
+dnl which is part of the reason why....
+
+dnl This is a great schedule for the d_cache, a poor schedule for the
+dnl b_cache. The lockup on U0 means that any stall can't be recovered
+dnl from. Consider a ldq in L1. say that load gets stalled because it
+dnl collides with a fill from the b_Cache. On the next cycle, this load
+dnl gets priority. If first looks at L0, and goes there. The instruction
+dnl we intended for L0 gets to look at L1, which is NOT where we want
+dnl it. It either stalls 1, because it can't go in L0, or goes there, and
+dnl causes a further instruction to stall.
+
+dnl So for b_cache, we're likely going to want to put one or more cycles
+dnl back into the code! And, of course, put in prefetches. For the
+dnl accumulator, lds, intent to modify. For the multiplier, you might
+dnl want ldq, evict next, if you're not wanting to use it again soon. Use
+dnl 256 ahead of present pointer value. At a place where we have an mt
+dnl followed by a bookkeeping, put the bookkeeping in upper, and the
+dnl prefetch into lower.
+
+dnl Note, the usage of physical registers per cycle is smoothed off, as
+dnl much as possible.
+
+dnl Note, the ldq's and stq's are at the end of the quadpacks. note, we'd
+dnl like not to have a ldq or stq to preceded a conditional branch in a
+dnl quadpack. The conditional branch moves the retire pointer one cycle
+dnl later.
+
+dnl Optimization notes:
+dnl Callee-saves regs: r9 r10 r11 r12 r13 r14 r15 r26 ?r27?
+dnl Reserved regs: r29 r30 r31
+dnl Free caller-saves regs in unrolled code: r24 r25 r28
+dnl We should swap some of the callee-saves regs for some of the free
+dnl caller-saves regs, saving some overhead cycles.
+dnl Most importantly, we should write fast code for the 0-7 case.
+dnl The code we use there are for the 21164, and runs at 7 cycles/limb
+dnl on the 21264. Should not be hard, if we write specialized code for
+dnl 1-7 limbs (the one for 0 limbs should be straightforward). We then just
+dnl need a jump table indexed by the low 3 bits of the count argument.
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ cmpult r18, 8, r1
+ beq r1, $Large
+
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r18, 1, r18 C size--
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ umulh r2, r19, r0 C r0 = prod_high
+ beq r18, $Lend0b C jump if size was == 1
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r18, 1, r18 C size--
+ addq r5, r3, r3
+ cmpult r3, r5, r4
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ beq r18, $Lend0a C jump if size was == 2
+
+ ALIGN(8)
+$Loop0: mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ subq r18, 1, r18 C size--
+ umulh r2, r19, r4 C r4 = cy_limb
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r5, r0, r0 C combine carries
+ bne r18, $Loop0
+$Lend0a:
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ umulh r2, r19, r4 C r4 = cy_limb
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r5, r0, r0 C combine carries
+ addq r4, r0, r0 C cy_limb = prod_high + cy
+ ret r31, (r26), 1
+$Lend0b:
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r0, r5, r0
+ ret r31, (r26), 1
+
+$Large:
+ lda $30, -240($30)
+ stq $9, 8($30)
+ stq $10, 16($30)
+ stq $11, 24($30)
+ stq $12, 32($30)
+ stq $13, 40($30)
+ stq $14, 48($30)
+ stq $15, 56($30)
+
+ and r18, 7, r20 C count for the first loop, 0-7
+ srl r18, 3, r18 C count for unrolled loop
+ bis r31, r31, r0
+ beq r20, $Lunroll
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r20, 1, r20 C size--
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ umulh r2, r19, r0 C r0 = prod_high
+ beq r20, $Lend1b C jump if size was == 1
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r20, 1, r20 C size--
+ addq r5, r3, r3
+ cmpult r3, r5, r4
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ beq r20, $Lend1a C jump if size was == 2
+
+ ALIGN(8)
+$Loop1: mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ subq r20, 1, r20 C size--
+ umulh r2, r19, r4 C r4 = cy_limb
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r5, r0, r0 C combine carries
+ bne r20, $Loop1
+
+$Lend1a:
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ umulh r2, r19, r4 C r4 = cy_limb
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r5, r0, r0 C combine carries
+ addq r4, r0, r0 C cy_limb = prod_high + cy
+ br r31, $Lunroll
+$Lend1b:
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r0, r5, r0
+
+$Lunroll:
+ lda r17, -16(r17) C L1 bookkeeping
+ lda r16, -16(r16) C L1 bookkeeping
+ bis r0, r31, r12
+
+C ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____
+
+ ldq r2, 16(r17) C L1
+ ldq r3, 24(r17) C L1
+ lda r18, -1(r18) C L1 bookkeeping
+ ldq r6, 16(r16) C L1
+ ldq r7, 24(r16) C L1
+ ldq r0, 32(r17) C L1
+ mulq r19, r2, r13 C U1
+ ldq r1, 40(r17) C L1
+ umulh r19, r2, r14 C U1
+ mulq r19, r3, r15 C U1
+ lda r17, 64(r17) C L1 bookkeeping
+ ldq r4, 32(r16) C L1
+ ldq r5, 40(r16) C L1
+ umulh r19, r3, r8 C U1
+ ldq r2, -16(r17) C L1
+ mulq r19, r0, r9 C U1
+ ldq r3, -8(r17) C L1
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ mulq r19, r1, r11 C U1
+ cmpult r6, r13, r20 C L0 lo add => carry
+ lda r16, 64(r16) C L1 bookkeeping
+ addq r6, r12, r22 C U0 hi add => answer
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ ldq r6, -16(r16) C L1
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ ldq r7, -8(r16) C L1
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ ldq r0, 0(r17) C L1
+ mulq r19, r2, r13 C U1
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ ldq r1, 8(r17) C L1
+ umulh r19, r2, r14 C U1
+ addq r4, r9, r4 C L0 lo + acc
+ stq r22, -48(r16) C L0
+ stq r23, -40(r16) C L1
+ mulq r19, r3, r15 C U1
+ addq r8, r21, r8 C U0 hi mul + carry
+ cmpult r4, r9, r20 C L0 lo add => carry
+ addq r4, r8, r22 C U0 hi add => answer
+ ble r18, $Lend C U1 bookkeeping
+
+C ____ MAIN UNROLLED LOOP ____
+ ALIGN(16)
+$Loop:
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ ldq r4, 0(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ ldq r5, 8(r16) C L1
+
+ umulh r19, r3, r8 C U1
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ ldq r2, 16(r17) C L1
+
+ mulq r19, r0, r9 C U1
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ ldq r3, 24(r17) C L1
+
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ stq r22, -32(r16) C L0
+ stq r23, -24(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r1, r11 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r12, r21, r12 C U0 hi mul + carry
+
+ cmpult r6, r13, r20 C L0 lo add => carry
+ bis r31, r31, r31 C U1 mt
+ lda r18, -1(r18) C L1 bookkeeping
+ addq r6, r12, r22 C U0 hi add => answer
+
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ ldq r6, 16(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ ldq r7, 24(r16) C L1
+
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ ldq r0, 32(r17) C L1
+
+ mulq r19, r2, r13 C U1
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ ldq r1, 40(r17) C L1
+
+ umulh r19, r2, r14 C U1
+ addq r4, r9, r4 C U0 lo + acc
+ stq r22, -16(r16) C L0
+ stq r23, -8(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r3, r15 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r8, r21, r8 C L0 hi mul + carry
+
+ cmpult r4, r9, r20 C L0 lo add => carry
+ bis r31, r31, r31 C U1 mt
+ lda r17, 64(r17) C L1 bookkeeping
+ addq r4, r8, r22 C U0 hi add => answer
+
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ ldq r4, 32(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ ldq r5, 40(r16) C L1
+
+ umulh r19, r3, r8 C U1
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ ldq r2, -16(r17) C L1
+
+ mulq r19, r0, r9 C U1
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ ldq r3, -8(r17) C L1
+
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ stq r22, 0(r16) C L0
+ stq r23, 8(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r1, r11 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r12, r21, r12 C U0 hi mul + carry
+
+ cmpult r6, r13, r20 C L0 lo add => carry
+ bis r31, r31, r31 C U1 mt
+ lda r16, 64(r16) C L1 bookkeeping
+ addq r6, r12, r22 C U0 hi add => answer
+
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ ldq r6, -16(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ ldq r7, -8(r16) C L1
+
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ ldq r0, 0(r17) C L1
+
+ mulq r19, r2, r13 C U1
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ ldq r1, 8(r17) C L1
+
+ umulh r19, r2, r14 C U1
+ addq r4, r9, r4 C L0 lo + acc
+ stq r22, -48(r16) C L0
+ stq r23, -40(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r3, r15 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r8, r21, r8 C U0 hi mul + carry
+
+ cmpult r4, r9, r20 C L0 lo add => carry
+ addq r4, r8, r22 C U0 hi add => answer
+ bis r31, r31, r31 C L1 mt
+ bgt r18, $Loop C U1 bookkeeping
+
+C ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____
+$Lend:
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ ldq r4, 0(r16) C L1
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ ldq r5, 8(r16) C L1
+ umulh r19, r3, r8 C U1
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ mulq r19, r0, r9 C U1
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ stq r22, -32(r16) C L0
+ stq r23, -24(r16) C L1
+ mulq r19, r1, r11 C U1
+ addq r12, r21, r12 C U0 hi mul + carry
+ cmpult r6, r13, r20 C L0 lo add => carry
+ addq r6, r12, r22 C U0 hi add => answer
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ addq r4, r9, r4 C U0 lo + acc
+ stq r22, -16(r16) C L0
+ stq r23, -8(r16) C L1
+ bis r31, r31, r31 C L0 st slosh
+ addq r8, r21, r8 C L0 hi mul + carry
+ cmpult r4, r9, r20 C L0 lo add => carry
+ addq r4, r8, r22 C U0 hi add => answer
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ stq r22, 0(r16) C L0
+ stq r23, 8(r16) C L1
+ addq r12, r21, r0 C U0 hi mul + carry
+
+ ldq $9, 8($30)
+ ldq $10, 16($30)
+ ldq $11, 24($30)
+ ldq $12, 32($30)
+ ldq $13, 40($30)
+ ldq $14, 48($30)
+ ldq $15, 56($30)
+ lda $30, 240($30)
+ ret r31, (r26), 1
+EPILOGUE(mpn_addmul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev6/gmp-mparam.h b/rts/gmp/mpn/alpha/ev6/gmp-mparam.h
new file mode 100644
index 0000000000..7ea20577f8
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev6/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Generated by tuneup.c, 2000-08-02. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 47
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 70
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 94
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 101
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 33
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 70
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 29
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 33
+#endif
diff --git a/rts/gmp/mpn/alpha/gmp-mparam.h b/rts/gmp/mpn/alpha/gmp-mparam.h
new file mode 100644
index 0000000000..054ff2fe5f
--- /dev/null
+++ b/rts/gmp/mpn/alpha/gmp-mparam.h
@@ -0,0 +1,64 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the 21164 family. The 21264 will require
+ different values, since it has such quick multiplication. */
+/* Generated by tuneup.c, 2000-07-19. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 22
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 53
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 31
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 47
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 64
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 98
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 17
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 4
+#endif
diff --git a/rts/gmp/mpn/alpha/invert_limb.asm b/rts/gmp/mpn/alpha/invert_limb.asm
new file mode 100644
index 0000000000..a921b32b3f
--- /dev/null
+++ b/rts/gmp/mpn/alpha/invert_limb.asm
@@ -0,0 +1,345 @@
+dnl Alpha mpn_invert_limb -- Invert a normalized limb.
+
+dnl Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+dnl
+dnl This is based on sophie:/gmp-stuff/dbg-inv-limb.c.
+dnl The ideas are due to Peter L. Montgomery
+dnl
+dnl The table below uses 4096 bytes. The file mentioned above has an
+dnl alternative function that doesn't require the table, but it runs 50%
+dnl slower than this.
+
+include(`../config.m4')
+
+ASM_START()
+
+FLOAT64($C36,9223372036854775808.0) C 2^63
+
+PROLOGUE_GP(mpn_invert_limb)
+ lda r30,-16(r30)
+ addq r16,r16,r1
+ bne r1,$73
+ lda r0,-1
+ br r31,$Lend
+$73:
+ srl r16,1,r1
+ stq r1,0(r30)
+ ldt f11,0(r30)
+ cvtqt f11,f1
+ lda r1,$C36
+ ldt f10,0(r1)
+ divt f10,f1,f10
+ lda r2,$invtab-4096
+ srl r16,52,r1
+ addq r1,r1,r1
+ addq r1,r2,r1
+ bic r1,6,r2
+ ldq r2,0(r2)
+ bic r1,1,r1
+ extwl r2,r1,r2
+ sll r2,48,r0
+ umulh r16,r0,r1
+ addq r16,r1,r3
+ stq r3,0(r30)
+ ldt f11,0(r30)
+ cvtqt f11,f1
+ mult f1,f10,f1
+ cvttqc f1,f1
+ stt f1,0(r30)
+ ldq r4,0(r30)
+ subq r0,r4,r0
+ umulh r16,r0,r1
+ mulq r16,r0,r2
+ addq r16,r1,r3
+ bge r3,$Loop2
+$Loop1: addq r2,r16,r2
+ cmpult r2,r16,r1
+ addq r3,r1,r3
+ addq r0,1,r0
+ blt r3,$Loop1
+$Loop2: cmpult r2,r16,r1
+ subq r0,1,r0
+ subq r3,r1,r3
+ subq r2,r16,r2
+ bge r3,$Loop2
+$Lend:
+ lda r30,16(r30)
+ ret r31,(r26),1
+EPILOGUE(mpn_invert_limb)
+DATASTART(`$invtab',4)
+ .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
+ .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
+ .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
+ .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
+ .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
+ .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
+ .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
+ .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
+ .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
+ .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
+ .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
+ .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
+ .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
+ .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
+ .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
+ .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
+ .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
+ .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
+ .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
+ .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
+ .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
+ .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
+ .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
+ .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
+ .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
+ .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
+ .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
+ .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
+ .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
+ .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
+ .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
+ .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
+ .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
+ .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
+ .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
+ .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
+ .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
+ .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
+ .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
+ .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
+ .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
+ .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
+ .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
+ .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
+ .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
+ .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
+ .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
+ .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
+ .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
+ .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
+ .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
+ .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
+ .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
+ .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
+ .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
+ .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
+ .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
+ .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
+ .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
+ .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
+ .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
+ .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
+ .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
+ .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
+ .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
+ .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
+ .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
+ .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
+ .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
+ .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
+ .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
+ .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
+ .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
+ .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
+ .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
+ .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
+ .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
+ .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
+ .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
+ .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
+ .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
+ .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
+ .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
+ .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
+ .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
+ .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
+ .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
+ .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
+ .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
+ .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
+ .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
+ .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
+ .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
+ .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
+ .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
+ .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
+ .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
+ .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
+ .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
+ .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
+ .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
+ .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
+ .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
+ .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
+ .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
+ .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
+ .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
+ .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
+ .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
+ .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
+ .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
+ .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
+ .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
+ .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
+ .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
+ .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
+ .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
+ .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
+ .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
+ .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
+ .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
+ .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
+ .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
+ .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
+ .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
+ .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
+ .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
+ .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
+ .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
+ .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
+ .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
+ .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
+ .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
+ .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
+ .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
+ .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
+ .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
+ .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
+ .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
+ .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
+ .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
+ .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
+ .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
+ .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
+ .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
+ .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
+ .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
+ .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
+ .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
+ .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
+ .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
+ .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
+ .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
+ .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
+ .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
+ .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
+ .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
+ .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
+ .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
+ .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
+ .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
+ .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
+ .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
+ .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
+ .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
+ .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
+ .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
+ .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
+ .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
+ .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
+ .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
+ .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
+ .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
+ .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
+ .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
+ .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
+ .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
+ .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
+ .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
+ .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
+ .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
+ .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
+ .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
+ .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
+ .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
+ .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
+ .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
+ .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
+ .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
+ .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
+ .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
+ .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
+ .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
+ .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
+ .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
+ .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
+ .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
+ .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
+ .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
+ .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
+ .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
+ .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
+ .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
+ .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
+ .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
+ .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
+ .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
+ .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
+ .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
+ .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
+ .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
+ .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
+ .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
+ .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
+ .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
+ .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
+ .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
+ .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
+ .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
+ .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
+ .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
+ .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
+ .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
+ .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
+ .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
+ .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
+ .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
+ .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
+ .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
+ .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
+ .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
+ .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
+ .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
+ .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
+ .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
+ .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
+ .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
+ .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
+ .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
+ .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
+ .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
+ .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
+ .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
+ .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
+ .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
+ .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
+ .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
+ .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
+ .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
+ .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
+ .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
+ .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
+ .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
+ .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
+ .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
+ .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
+DATAEND()
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/lshift.asm b/rts/gmp/mpn/alpha/lshift.asm
new file mode 100644
index 0000000000..87c46f6fe7
--- /dev/null
+++ b/rts/gmp/mpn/alpha/lshift.asm
@@ -0,0 +1,104 @@
+dnl Alpha mpn_lshift -- Shift a number left.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+dnl it would take 4 cycles/limb. It should be possible to get down to 3
+dnl cycles/limb since both ldq and stq can be paired with the other used
+dnl instructions. But there are many restrictions in the 21064 pipeline that
+dnl makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+dnl 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+dnl 2. Only aligned instruction pairs can be paired.
+dnl 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ s8addq r18,r17,r17 C make r17 point at end of s1
+ ldq r4,-8(r17) C load first limb
+ subq r17,8,r17
+ subq r31,r19,r7
+ s8addq r18,r16,r16 C make r16 point at end of RES
+ subq r18,1,r18
+ and r18,4-1,r20 C number of limbs in first loop
+ srl r4,r7,r0 C compute function result
+
+ beq r20,$L0
+ subq r18,r20,r18
+
+ ALIGN(8)
+$Loop0:
+ ldq r3,-8(r17)
+ subq r16,8,r16
+ subq r17,8,r17
+ subq r20,1,r20
+ sll r4,r19,r5
+ srl r3,r7,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,0(r16)
+ bne r20,$Loop0
+
+$L0: beq r18,$Lend
+
+ ALIGN(8)
+$Loop: ldq r3,-8(r17)
+ subq r16,32,r16
+ subq r18,4,r18
+ sll r4,r19,r5
+ srl r3,r7,r6
+
+ ldq r4,-16(r17)
+ sll r3,r19,r1
+ bis r5,r6,r8
+ stq r8,24(r16)
+ srl r4,r7,r2
+
+ ldq r3,-24(r17)
+ sll r4,r19,r5
+ bis r1,r2,r8
+ stq r8,16(r16)
+ srl r3,r7,r6
+
+ ldq r4,-32(r17)
+ sll r3,r19,r1
+ bis r5,r6,r8
+ stq r8,8(r16)
+ srl r4,r7,r2
+
+ subq r17,32,r17
+ bis r1,r2,r8
+ stq r8,0(r16)
+
+ bgt r18,$Loop
+
+$Lend: sll r4,r19,r8
+ stq r8,-8(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/mul_1.asm b/rts/gmp/mpn/alpha/mul_1.asm
new file mode 100644
index 0000000000..46b8df34f5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/mul_1.asm
@@ -0,0 +1,71 @@
+dnl Alpha __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ ldq r2,0(r17) C r2 = s1_limb
+ subq r18,1,r18 C size--
+ mulq r2,r19,r3 C r3 = prod_low
+ bic r31,r31,r4 C clear cy_limb
+ umulh r2,r19,r0 C r0 = prod_high
+ beq r18,$Lend1 C jump if size was == 1
+ ldq r2,8(r17) C r2 = s1_limb
+ subq r18,1,r18 C size--
+ stq r3,0(r16)
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: mulq r2,r19,r3 C r3 = prod_low
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ subq r18,1,r18 C size--
+ umulh r2,r19,r4 C r4 = cy_limb
+ ldq r2,16(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ stq r3,8(r16)
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ addq r16,8,r16 C res_ptr++
+ bne r18,$Loop
+
+$Lend2: mulq r2,r19,r3 C r3 = prod_low
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ umulh r2,r19,r4 C r4 = cy_limb
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ stq r3,8(r16)
+ addq r4,r0,r0 C cy_limb = prod_high + cy
+ ret r31,(r26),1
+$Lend1: stq r3,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_mul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/rshift.asm b/rts/gmp/mpn/alpha/rshift.asm
new file mode 100644
index 0000000000..aa25eda54e
--- /dev/null
+++ b/rts/gmp/mpn/alpha/rshift.asm
@@ -0,0 +1,102 @@
+dnl Alpha mpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+dnl it would take 4 cycles/limb. It should be possible to get down to 3
+dnl cycles/limb since both ldq and stq can be paired with the other used
+dnl instructions. But there are many restrictions in the 21064 pipeline that
+dnl makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+dnl 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+dnl 2. Only aligned instruction pairs can be paired.
+dnl 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ldq r4,0(r17) C load first limb
+ addq r17,8,r17
+ subq r31,r19,r7
+ subq r18,1,r18
+ and r18,4-1,r20 C number of limbs in first loop
+ sll r4,r7,r0 C compute function result
+
+ beq r20,$L0
+ subq r18,r20,r18
+
+ ALIGN(8)
+$Loop0:
+ ldq r3,0(r17)
+ addq r16,8,r16
+ addq r17,8,r17
+ subq r20,1,r20
+ srl r4,r19,r5
+ sll r3,r7,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,-8(r16)
+ bne r20,$Loop0
+
+$L0: beq r18,$Lend
+
+ ALIGN(8)
+$Loop: ldq r3,0(r17)
+ addq r16,32,r16
+ subq r18,4,r18
+ srl r4,r19,r5
+ sll r3,r7,r6
+
+ ldq r4,8(r17)
+ srl r3,r19,r1
+ bis r5,r6,r8
+ stq r8,-32(r16)
+ sll r4,r7,r2
+
+ ldq r3,16(r17)
+ srl r4,r19,r5
+ bis r1,r2,r8
+ stq r8,-24(r16)
+ sll r3,r7,r6
+
+ ldq r4,24(r17)
+ srl r3,r19,r1
+ bis r5,r6,r8
+ stq r8,-16(r16)
+ sll r4,r7,r2
+
+ addq r17,32,r17
+ bis r1,r2,r8
+ stq r8,-8(r16)
+
+ bgt r18,$Loop
+
+$Lend: srl r4,r19,r8
+ stq r8,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/sub_n.asm b/rts/gmp/mpn/alpha/sub_n.asm
new file mode 100644
index 0000000000..718f657141
--- /dev/null
+++ b/rts/gmp/mpn/alpha/sub_n.asm
@@ -0,0 +1,114 @@
+dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ ldq r3,0(r17)
+ ldq r4,0(r18)
+
+ subq r19,1,r19
+ and r19,4-1,r2 C number of limbs in first loop
+ bis r31,r31,r0
+ beq r2,$L0 C if multiple of 4 limbs, skip first loop
+
+ subq r19,r2,r19
+
+$Loop0: subq r2,1,r2
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ addq r17,8,r17
+ addq r18,8,r18
+ bis r5,r5,r3
+ bis r6,r6,r4
+ addq r16,8,r16
+ bne r2,$Loop0
+
+$L0: beq r19,$Lend
+
+ ALIGN(8)
+$Loop: subq r19,4,r19
+
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ ldq r3,16(r17)
+ addq r6,r0,r6
+ ldq r4,16(r18)
+ cmpult r6,r0,r1
+ subq r5,r6,r6
+ cmpult r5,r6,r0
+ stq r6,8(r16)
+ bis r0,r1,r0
+
+ ldq r5,24(r17)
+ addq r4,r0,r4
+ ldq r6,24(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,16(r16)
+ bis r0,r1,r0
+
+ ldq r3,32(r17)
+ addq r6,r0,r6
+ ldq r4,32(r18)
+ cmpult r6,r0,r1
+ subq r5,r6,r6
+ cmpult r5,r6,r0
+ stq r6,24(r16)
+ bis r0,r1,r0
+
+ addq r17,32,r17
+ addq r18,32,r18
+ addq r16,32,r16
+ bne r19,$Loop
+
+$Lend: addq r4,r0,r4
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/submul_1.asm b/rts/gmp/mpn/alpha/submul_1.asm
new file mode 100644
index 0000000000..caec1a720b
--- /dev/null
+++ b/rts/gmp/mpn/alpha/submul_1.asm
@@ -0,0 +1,87 @@
+dnl Alpha __gmpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ umulh r2,r19,r0 C r0 = prod_high
+ beq r18,$Lend1 C jump if size was == 1
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ subq r5,r3,r3
+ cmpult r5,r3,r4
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ subq r18,1,r18 C size--
+ umulh r2,r19,r4 C r4 = cy_limb
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ subq r5,r3,r3
+ cmpult r5,r3,r5
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ addq r5,r0,r0 C combine carries
+ bne r18,$Loop
+
+$Lend2: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ umulh r2,r19,r4 C r4 = cy_limb
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ subq r5,r3,r3
+ cmpult r5,r3,r5
+ stq r3,0(r16)
+ addq r5,r0,r0 C combine carries
+ addq r4,r0,r0 C cy_limb = prod_high + cy
+ ret r31,(r26),1
+$Lend1: subq r5,r3,r3
+ cmpult r5,r3,r5
+ stq r3,0(r16)
+ addq r0,r5,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_submul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/udiv_qrnnd.S b/rts/gmp/mpn/alpha/udiv_qrnnd.S
new file mode 100644
index 0000000000..53814bbcb0
--- /dev/null
+++ b/rts/gmp/mpn/alpha/udiv_qrnnd.S
@@ -0,0 +1,151 @@
+ # Alpha 21064 __udiv_qrnnd
+
+ # Copyright (C) 1992, 1994, 1995, 1997, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __gmpn_udiv_qrnnd
+ .ent __gmpn_udiv_qrnnd
+__gmpn_udiv_qrnnd:
+ .frame $30,0,$26,0
+ .prologue 0
+#define cnt $2
+#define tmp $3
+#define rem_ptr $16
+#define n1 $17
+#define n0 $18
+#define d $19
+#define qb $20
+
+ ldiq cnt,16
+ blt d,.Largedivisor
+
+.Loop1: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop1
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.Largedivisor:
+ and n0,1,$4
+
+ srl n0,1,n0
+ sll n1,63,tmp
+ or tmp,n0,n0
+ srl n1,1,n1
+
+ and d,1,$6
+ srl d,1,$5
+ addq $5,$6,$5
+
+.Loop2: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop2
+
+ addq n1,n1,n1
+ addq $4,n1,n1
+ bne $6,.LOdd
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.LOdd:
+ /* q' in n0. r' in n1 */
+ addq n1,n0,n1
+ cmpult n1,n0,tmp # tmp := carry from addq
+ beq tmp,.LLp6
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp6: cmpult n1,d,tmp
+ bne tmp,.LLp7
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp7:
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+ .end __gmpn_udiv_qrnnd
diff --git a/rts/gmp/mpn/alpha/umul.asm b/rts/gmp/mpn/alpha/umul.asm
new file mode 100644
index 0000000000..44428ed5f5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/umul.asm
@@ -0,0 +1,39 @@
+dnl Currently unused.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+ .set noreorder
+ .set volatile
+ .set noat
+
+.text
+ .align 3
+ .globl __umul_ppmm
+ .ent __umul_ppmm
+__umul_ppmm:
+__umul_ppmm..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ mulq $17,$18,$1
+ umulh $17,$18,$0
+ stq $1,0($16)
+ ret $31,($26),1
+ .end __umul_ppmm
diff --git a/rts/gmp/mpn/alpha/unicos.m4 b/rts/gmp/mpn/alpha/unicos.m4
new file mode 100644
index 0000000000..7ff26c090c
--- /dev/null
+++ b/rts/gmp/mpn/alpha/unicos.m4
@@ -0,0 +1,63 @@
+divert(-1)
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+define(`ASM_START',
+ `.ident dummy')
+
+define(`X',`^X$1')
+define(`FLOAT64',
+ `dnl
+ .psect $1@crud,data
+$1: .t_floating $2
+ .endp')
+
+define(`PROLOGUE',
+ `dnl
+ .stack 192 ; What does this mean? Only Cray knows.
+ .psect $1@code,code,cache
+$1::')
+define(`PROLOGUE_GP', `PROLOGUE($1)')
+
+define(`EPILOGUE',
+ `dnl
+ .endp')
+
+define(`DATASTART',
+ `dnl
+ .psect $1@crud,data
+$1:')
+define(`DATAEND',
+ `dnl
+ .endp')
+
+define(`ASM_END',
+ `dnl
+ .end')
+
+define(`unop',`bis r31,r31,r31') ; Unicos assembler lacks unop
+define(`cvttqc',`cvttq/c')
+
+define(`ALIGN',`') ; Unicos assembler seems to align using garbage
+
+divert
+
diff --git a/rts/gmp/mpn/arm/add_n.S b/rts/gmp/mpn/arm/add_n.S
new file mode 100644
index 0000000000..fb3f8f703b
--- /dev/null
+++ b/rts/gmp/mpn/arm/add_n.S
@@ -0,0 +1,77 @@
+@ ARM mpn_add -- Add two limb vectors of the same length > 0 and store sum in
+@ a third limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define s r0
+#define a r1
+#define b r2
+#define n r3
+
+#define sl r10
+#define fp r11
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_add_n
+ .type __gmpn_add_n,%function
+__gmpn_add_n:
+ stmfd sp!, { r8, r9, lr }
+ movs n, n, lsr #1
+ bcc skip1
+ ldr ip, [a], #4
+ ldr lr, [b], #4
+ adds ip, ip, lr
+ str ip, [s], #4
+skip1:
+ tst n, #1
+ beq skip2
+ ldmia a!, { r8, r9 }
+ ldmia b!, { ip, lr }
+ adcs r8, r8, ip
+ adcs r9, r9, lr
+ stmia s!, { r8, r9 }
+skip2:
+ bics n, n, #1
+ beq return
+ stmfd sp!, { r4, r5, r6, r7 }
+add_n_loop:
+ ldmia a!, { r4, r5, r6, r7 }
+ ldmia b!, { r8, r9, ip, lr }
+ adcs r4, r4, r8
+ ldr r8, [s] /* Bring stuff into cache. */
+ adcs r5, r5, r9
+ adcs r6, r6, ip
+ adcs r7, r7, lr
+ stmia s!, { r4, r5, r6, r7 }
+ sub n, n, #2
+ teq n, #0
+ bne add_n_loop
+ ldmfd sp!, { r4, r5, r6, r7 }
+return:
+ adc r0, n, #0
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_add_n, end - __gmpn_add_n
diff --git a/rts/gmp/mpn/arm/addmul_1.S b/rts/gmp/mpn/arm/addmul_1.S
new file mode 100644
index 0000000000..396fff77a3
--- /dev/null
+++ b/rts/gmp/mpn/arm/addmul_1.S
@@ -0,0 +1,89 @@
+@ ARM mpn_mul_1 -- Multiply a limb vector with a limb and add the result to a
+@ second limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define p r0
+#define a r1
+#define n r2
+#define w r3
+
+#define z r11
+
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_addmul_1
+ .type __gmpn_addmul_1,%function
+__gmpn_addmul_1:
+ stmfd sp!, { r8-r11, lr }
+ mov z, #0
+ mov ip, #0
+ movs n, n, lsr #1
+ bcc skip1
+ ldr lr, [a], #4
+ ldr r9, [p]
+ umlal r9, ip, w, lr
+ str r9, [p], #4
+skip1:
+ movs n, n, lsr #1
+ bcc skip2
+ ldmia p, { r9, r10 }
+ adds r8, ip, r9
+ adc r9, z, #0
+ ldmia a!, { ip, lr }
+ umlal r8, r9, w, ip
+ adds r9, r9, r10
+ adc ip, z, #0
+ umlal r9, ip, w, lr
+ stmia p!, { r8, r9 }
+skip2:
+ teq n, #0
+ beq return
+ stmfd sp!, { r4-r7 }
+addmul_loop:
+ ldmia p, { r5, r6, r7, r8 }
+ adds r4, ip, r5
+ adc r5, z, #0
+ ldmia a!, { r9, r10, ip, lr }
+ umlal r4, r5, w, r9
+ adds r5, r5, r6
+ adc r6, z, #0
+ umlal r5, r6, w, r10
+ adds r6, r6, r7
+ adc r7, z, #0
+ umlal r6, r7, w, ip
+ adds r7, r7, r8
+ adc ip, z, #0
+ umlal r7, ip, w, lr
+ subs n, n, #1
+ stmia p!, { r4, r5, r6, r7 }
+ bne addmul_loop
+ ldmfd sp!, { r4-r7 }
+return:
+ mov r0, ip
+ ldmfd sp!, { r8-r11, pc }
+end:
+ .size __gmpn_addmul_1, end - __gmpn_addmul_1
diff --git a/rts/gmp/mpn/arm/gmp-mparam.h b/rts/gmp/mpn/arm/gmp-mparam.h
new file mode 100644
index 0000000000..a35b0c7b66
--- /dev/null
+++ b/rts/gmp/mpn/arm/gmp-mparam.h
@@ -0,0 +1,34 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 21
+#endif
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 48
+#endif
diff --git a/rts/gmp/mpn/arm/mul_1.S b/rts/gmp/mpn/arm/mul_1.S
new file mode 100644
index 0000000000..bae526a0f0
--- /dev/null
+++ b/rts/gmp/mpn/arm/mul_1.S
@@ -0,0 +1,81 @@
+@ ARM mpn_addmul_1 -- Multiply a limb vector with a limb and store the result
+@ in a second limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define p r0
+#define a r1
+#define n r2
+#define w r3
+
+#define sl r10
+#define fp r11
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_mul_1
+ .type __gmpn_mul_1,%function
+__gmpn_mul_1:
+ stmfd sp!, { r8, r9, lr }
+ ands ip, n, #1
+ beq skip1
+ ldr lr, [a], #4
+ umull r9, ip, w, lr
+ str r9, [p], #4
+skip1:
+ tst n, #2
+ beq skip2
+ mov r8, ip
+ ldmia a!, { ip, lr }
+ mov r9, #0
+ umlal r8, r9, w, ip
+ mov ip, #0
+ umlal r9, ip, w, lr
+ stmia p!, { r8, r9 }
+skip2:
+ bics n, n, #3
+ beq return
+ stmfd sp!, { r6, r7 }
+mul_1_loop:
+ mov r6, ip
+ ldmia a!, { r8, r9, ip, lr }
+ ldr r7, [p] /* Bring stuff into cache. */
+ mov r7, #0
+ umlal r6, r7, w, r8
+ mov r8, #0
+ umlal r7, r8, w, r9
+ mov r9, #0
+ umlal r8, r9, w, ip
+ mov ip, #0
+ umlal r9, ip, w, lr
+ subs n, n, #4
+ stmia p!, { r6, r7, r8, r9 }
+ bne mul_1_loop
+ ldmfd sp!, { r6, r7 }
+return:
+ mov r0, ip
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_mul_1, end - __gmpn_mul_1
diff --git a/rts/gmp/mpn/arm/sub_n.S b/rts/gmp/mpn/arm/sub_n.S
new file mode 100644
index 0000000000..856505fe21
--- /dev/null
+++ b/rts/gmp/mpn/arm/sub_n.S
@@ -0,0 +1,79 @@
+@ ARM mpn_sub -- Subtract two limb vectors of the same length > 0 and store
+@ difference in a third limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define d r0
+#define a r1
+#define b r2
+#define n r3
+
+#define sl r10
+#define fp r11
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_sub_n
+ .type __gmpn_sub_n,%function
+__gmpn_sub_n:
+ stmfd sp!, { r8, r9, lr }
+ subs ip, ip, ip
+ tst n, #1
+ beq skip1
+ ldr ip, [a], #4
+ ldr lr, [b], #4
+ subs ip, ip, lr
+ str ip, [d], #4
+skip1:
+ tst n, #2
+ beq skip2
+ ldmia a!, { r8, r9 }
+ ldmia b!, { ip, lr }
+ sbcs r8, r8, ip
+ sbcs r9, r9, lr
+ stmia d!, { r8, r9 }
+skip2:
+ bics n, n, #3
+ beq return
+ stmfd sp!, { r4, r5, r6, r7 }
+sub_n_loop:
+ ldmia a!, { r4, r5, r6, r7 }
+ ldmia b!, { r8, r9, ip, lr }
+ sbcs r4, r4, r8
+ ldr r8, [d] /* Bring stuff into cache. */
+ sbcs r5, r5, r9
+ sbcs r6, r6, ip
+ sbcs r7, r7, lr
+ stmia d!, { r4, r5, r6, r7 }
+ sub n, n, #4
+ teq n, #0
+ bne sub_n_loop
+ ldmfd sp!, { r4, r5, r6, r7 }
+return:
+ sbc r0, r0, r0
+ and r0, r0, #1
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_sub_n, end - __gmpn_sub_n
diff --git a/rts/gmp/mpn/asm-defs.m4 b/rts/gmp/mpn/asm-defs.m4
new file mode 100644
index 0000000000..aa2024138b
--- /dev/null
+++ b/rts/gmp/mpn/asm-defs.m4
@@ -0,0 +1,1182 @@
+divert(-1)
+dnl
+dnl m4 macros for gmp assembly code, shared by all CPUs.
+dnl
+dnl These macros are designed for use with any m4 and have been used on
+dnl GNU, FreeBSD, OpenBSD and SysV.
+dnl
+dnl GNU m4 and OpenBSD 2.7 m4 will give filenames and line numbers in error
+dnl messages.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Macros:
+dnl
+dnl Most new m4 specific macros have an "m4_" prefix to emphasise they're
+dnl m4 expansions. But new defining things like deflit() and defreg() are
+dnl named like the builtin define(), and forloop() is named following the
+dnl GNU m4 example on which it's based.
+dnl
+dnl GNU m4 with the -P option uses "m4_" as a prefix for builtins, but that
+dnl option isn't going to be used, so there's no conflict or confusion.
+dnl
+dnl
+dnl Comments in output:
+dnl
+dnl The m4 comment delimiters are left at # and \n, the normal assembler
+dnl commenting for most CPUs. m4 passes comment text through without
+dnl expanding macros in it, which is generally a good thing since it stops
+dnl unexpected expansions and possible resultant errors.
+dnl
+dnl But note that when a quoted string is being read, a # isn't special, so
+dnl apostrophes in comments in quoted strings must be avoided or they'll be
+dnl interpreted as a closing quote mark. But when the quoted text is
+dnl re-read # will still act like a normal comment, supressing macro
+dnl expansion.
+dnl
+dnl For example,
+dnl
+dnl # apostrophes in comments that're outside quotes are ok
+dnl # and using macro names like PROLOGUE is ok too
+dnl ...
+dnl ifdef(`PIC',`
+dnl # but apostrophes aren't ok inside quotes
+dnl # ^--wrong
+dnl ...
+dnl # though macro names like PROLOGUE are still ok
+dnl ...
+dnl ')
+dnl
+dnl If macro expansion in a comment is wanted, use `#' in the .asm (ie. a
+dnl quoted hash symbol), which will turn into # in the .s but get
+dnl expansions done on that line. This can make the .s more readable to
+dnl humans, but it won't make a blind bit of difference to the assembler.
+dnl
+dnl All the above applies, mutatis mutandis, when changecom() is used to
+dnl select @ ! ; or whatever other commenting.
+dnl
+dnl
+dnl Variations in m4 affecting gmp:
+dnl
+dnl $# - When a macro is called as "foo" with no brackets, BSD m4 sets $#
+dnl to 1, whereas GNU or SysV m4 set it to 0. In all cases though
+dnl "foo()" sets $# to 1. This is worked around in various places.
+dnl
+dnl len() - When "len()" is given an empty argument, BSD m4 evaluates to
+dnl nothing, whereas GNU, SysV, and the new OpenBSD, evaluate to 0.
+dnl See m4_length() below which works around this.
+dnl
+dnl translit() - GNU m4 accepts character ranges like A-Z, and the new
+dnl OpenBSD m4 does under option -g, but basic BSD and SysV don't.
+dnl
+dnl popdef() - in BSD and SysV m4 popdef() takes multiple arguments and
+dnl pops each, but GNU m4 only takes one argument.
+dnl
+dnl push back - BSD m4 has some limits on the amount of text that can be
+dnl pushed back. The limit is reasonably big and so long as macros
+dnl don't gratuitously duplicate big arguments it isn't a problem.
+dnl Normally an error message is given, but sometimes it just hangs.
+dnl
+dnl eval() &,|,^ - GNU and SysV m4 have bitwise operators &,|,^ available,
+dnl but BSD m4 doesn't (contrary to what the man page suggests) and
+dnl instead ^ is exponentiation.
+dnl
+dnl eval() ?: - The C ternary operator "?:" is available in BSD m4, but not
+dnl in SysV or GNU m4 (as of GNU m4 1.4 and betas of 1.5).
+dnl
+dnl eval() -2^31 - BSD m4 has a bug where an eval() resulting in -2^31
+dnl (ie. -2147483648) gives "-(". Using -2147483648 within an
+dnl expression is ok, it just can't be a final result. "-(" will of
+dnl course upset parsing, with all sorts of strange effects.
+dnl
+dnl eval() <<,>> - SysV m4 doesn't support shift operators in eval() (on
+dnl SunOS 5.7 /usr/xpg4/m4 has them but /usr/ccs/m4 doesn't). See
+dnl m4_lshift() and m4_rshift() below for workarounds.
+dnl
+dnl m4wrap() - in BSD m4, m4wrap() replaces any previous m4wrap() string,
+dnl in SysV m4 it appends to it, and in GNU m4 it prepends. See
+dnl m4wrap_prepend() below which brings uniformity to this.
+dnl
+dnl __file__,__line__ - GNU m4 and OpenBSD 2.7 m4 provide these, and
+dnl they're used here to make error messages more informative. GNU m4
+dnl gives an unhelpful "NONE 0" in an m4wrap(), but that's worked
+dnl around.
+dnl
+dnl __file__ quoting - OpenBSD m4, unlike GNU m4, doesn't quote the
+dnl filename in __file__, so care should be taken that no macro has
+dnl the same name as a file, or an unwanted expansion will occur when
+dnl printing an error or warning.
+dnl
+dnl OpenBSD 2.6 m4 - this m4 rejects decimal constants containing an 8 or 9
+dnl in eval(), making it pretty much unusable. This bug is confined
+dnl to version 2.6 (it's not in 2.5, and has been fixed in 2.7).
+dnl
+dnl SunOS /usr/bin/m4 - this m4 lacks a number of desired features,
+dnl including $# and $@, defn(), m4exit(), m4wrap(), pushdef(),
+dnl popdef(). /usr/5bin/m4 is a SysV style m4 which should always be
+dnl available, and "configure" will reject /usr/bin/m4 in favour of
+dnl /usr/5bin/m4 (if necessary).
+dnl
+dnl The sparc code actually has modest m4 requirements currently and
+dnl could manage with /usr/bin/m4, but there's no reason to put our
+dnl macros through contortions when /usr/5bin/m4 is available or GNU
+dnl m4 can be installed.
+
+
+ifdef(`__ASM_DEFS_M4_INCLUDED__',
+`m4_error(`asm-defs.m4 already included, dont include it twice
+')m4exit(1)')
+define(`__ASM_DEFS_M4_INCLUDED__')
+
+
+dnl Detect and give a message about the unsuitable OpenBSD 2.6 m4.
+
+ifelse(eval(89),89,,
+`errprint(
+`This m4 doesnt accept 8 and/or 9 in constants in eval(), making it unusable.
+This is probably OpenBSD 2.6 m4 (September 1999). Upgrade to OpenBSD 2.7,
+or get a bug fix from the CVS (expr.c rev 1.9), or get GNU m4. Dont forget
+to configure with M4=/wherever/m4 if you install one of these in a directory
+not in $PATH.
+')m4exit(1)')
+
+
+dnl Detect and give a message about the unsuitable SunOS /usr/bin/m4.
+dnl
+dnl Unfortunately this test doesn't work when m4 is run in the normal way
+dnl from mpn/Makefile with "m4 -DOPERATION_foo foo.asm", since the bad m4
+dnl takes "-" in "-D..." to mean read stdin, so it will look like it just
+dnl hangs. But running "m4 asm-defs.m4" to try it out will work.
+dnl
+dnl We'd like to abort immediately on finding a problem, but unfortunately
+dnl the bad m4 doesn't have an m4exit(), nor does an invalid eval() kill
+dnl it. Unexpanded $#'s in some m4_assert_numargs() later on will comment
+dnl out some closing parentheses and kill it with "m4: arg stack overflow".
+
+define(m4_dollarhash_works_test,``$#'')
+ifelse(m4_dollarhash_works_test(x),1,,
+`errprint(
+`This m4 doesnt support $# and cant be used for GMP asm processing.
+If this is on SunOS, ./configure should choose /usr/5bin/m4 if you have that
+or can get it, otherwise install GNU m4. Dont forget to configure with
+M4=/wherever/m4 if you install in a directory not in $PATH.
+')')
+undefine(`m4_dollarhash_works_test')
+
+
+dnl --------------------------------------------------------------------------
+dnl Basic error handling things.
+
+
+dnl Usage: m4_dollarhash_1_if_noparen_p
+dnl
+dnl Expand to 1 if a call "foo" gives $# set to 1 (as opposed to 0 like GNU
+dnl and SysV m4 give).
+
+define(m4_dollarhash_1_if_noparen_test,`$#')
+define(m4_dollarhash_1_if_noparen_p,
+eval(m4_dollarhash_1_if_noparen_test==1))
+undefine(`m4_dollarhash_1_if_noparen_test')
+
+
+dnl Usage: m4wrap_prepend(string)
+dnl
+dnl Prepend the given string to what will be exapanded under m4wrap at the
+dnl end of input.
+dnl
+dnl This macro exists to work around variations in m4wrap() behaviour in
+dnl the various m4s (notes at the start of this file). Don't use m4wrap()
+dnl directly since it will interfere with this scheme.
+
+define(m4wrap_prepend,
+m4_assert_numargs(1)
+`define(`m4wrap_string',`$1'defn(`m4wrap_string'))')
+
+m4wrap(`m4wrap_string')
+define(m4wrap_string,`')
+
+
+dnl Usage: m4_file_and_line
+dnl
+dnl Expand to the current file and line number, if the GNU m4 extensions
+dnl __file__ and __line__ are available.
+dnl
+dnl In GNU m4 1.4 at the end of input when m4wrap text is expanded,
+dnl __file__ is NONE and __line__ is 0, which is not a helpful thing to
+dnl print. If m4_file_seen() has been called to note the last file seen,
+dnl then that file at a big line number is used, otherwise "end of input"
+dnl is used (although "end of input" won't parse as an error message).
+
+define(m4_file_and_line,
+`ifdef(`__file__',
+`ifelse(__file__`'__line__,`NONE0',
+`ifdef(`m4_file_seen_last',`m4_file_seen_last: 999999: ',`end of input: ')',
+`__file__: __line__: ')')')
+
+
+dnl Usage: m4_errprint_commas(arg,...)
+dnl
+dnl The same as errprint(), but commas are printed between arguments
+dnl instead of spaces.
+
+define(m4_errprint_commas,
+`errprint(`$1')dnl
+ifelse(eval($#>1),1,`errprint(`,')m4_errprint_commas(shift($@))')')
+
+
+dnl Usage: m4_error(args...)
+dnl m4_warning(args...)
+dnl
+dnl Print an error message, using m4_errprint_commas, prefixed with the
+dnl current filename and line number (if available). m4_error sets up to
+dnl give an error exit at the end of processing, m4_warning just prints.
+dnl These macros are the recommended way to print errors.
+dnl
+dnl The arguments here should be quoted in the usual way to prevent them
+dnl being expanded when the macro call is read. (m4_error takes care not
+dnl to do any further expansion.)
+dnl
+dnl For example,
+dnl
+dnl m4_error(`some error message
+dnl ')
+dnl
+dnl which prints
+dnl
+dnl foo.asm:123: some error message
+dnl
+dnl or if __file__ and __line__ aren't available
+dnl
+dnl some error message
+dnl
+dnl The "file:line:" format is a basic style, used by gcc and GNU m4, so
+dnl emacs and other editors will recognise it in their normal error message
+dnl parsing.
+
+define(m4_warning,
+`m4_errprint_commas(m4_file_and_line`'$@)')
+
+define(m4_error,
+`define(`m4_error_occurred',1)m4_warning($@)')
+
+define(`m4_error_occurred',0)
+
+dnl This m4wrap_prepend() is first, so it'll be executed last.
+m4wrap_prepend(
+`ifelse(m4_error_occurred,1,
+`m4_error(`Errors occurred during m4 processing
+')m4exit(1)')')
+
+
+dnl Usage: m4_assert_numargs(num)
+dnl
+dnl Put this unquoted on a line on its own at the start of a macro
+dnl definition to add some code to check that num many arguments get passed
+dnl to the macro. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_numargs(2)
+dnl `something `$1' and `$2' blah blah')
+dnl
+dnl Then a call like foo(one,two,three) will provoke an error like
+dnl
+dnl file:10: foo expected 2 arguments, got 3 arguments
+dnl
+dnl Here are some calls and how many arguments they're interpreted as passing.
+dnl
+dnl foo(abc,def) 2
+dnl foo(xyz) 1
+dnl foo() 0
+dnl foo -1
+dnl
+dnl The -1 for no parentheses at all means a macro that's meant to be used
+dnl that way can be checked with m4_assert_numargs(-1). For example,
+dnl
+dnl define(SPECIAL_SUFFIX,
+dnl m4_assert_numargs(-1)
+dnl `ifdef(`FOO',`_foo',`_bar')')
+dnl
+dnl But as an alternative see also deflit() below where parenthesized
+dnl expressions following a macro are passed through to the output.
+dnl
+dnl Note that in BSD m4 there's no way to differentiate calls "foo" and
+dnl "foo()", so in BSD m4 the distinction between the two isn't enforced.
+dnl (In GNU and SysV m4 it can be checked, and is.)
+
+
+dnl m4_assert_numargs is able to check its own arguments by calling
+dnl assert_numargs_internal directly.
+dnl
+dnl m4_doublequote($`'0) expands to ``$0'', whereas ``$`'0'' would expand
+dnl to `$`'0' and do the wrong thing, and likewise for $1. The same is
+dnl done in other assert macros.
+dnl
+dnl $`#' leaves $# in the new macro being defined, and stops # being
+dnl interpreted as a comment character.
+dnl
+dnl `dnl ' means an explicit dnl isn't necessary when m4_assert_numargs is
+dnl used. The space means that if there is a dnl it'll still work.
+
+dnl Usage: m4_doublequote(x) expands to ``x''
+define(m4_doublequote,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))``$1''')
+
+define(m4_assert_numargs,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))dnl
+`m4_assert_numargs_internal'(m4_doublequote($`'0),$1,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl Called: m4_assert_numargs_internal(`macroname',wantargs,$#,len(`$1'))
+define(m4_assert_numargs_internal,
+`m4_assert_numargs_internal_check(`$1',`$2',m4_numargs_count(`$3',`$4'))')
+
+dnl Called: m4_assert_numargs_internal_check(`macroname',wantargs,gotargs)
+dnl
+dnl If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl should be -1. If wantargs is -1 but gotargs is 0 and the two can't be
+dnl distinguished then it's allowed to pass.
+dnl
+define(m4_assert_numargs_internal_check,
+`ifelse(eval($2 == $3
+ || ($2==-1 && $3==0 && m4_dollarhash_1_if_noparen_p)),0,
+`m4_error(`$1 expected 'm4_Narguments(`$2')`, got 'm4_Narguments(`$3')
+)')')
+
+dnl Called: m4_numargs_count($#,len(`$1'))
+dnl If $#==0 then -1 args, if $#==1 but len(`$1')==0 then 0 args, otherwise
+dnl $# args.
+define(m4_numargs_count,
+`ifelse($1,0, -1,
+`ifelse(eval($1==1 && $2-0==0),1, 0, $1)')')
+
+dnl Usage: m4_Narguments(N)
+dnl "$1 argument" or "$1 arguments" with the plural according to $1.
+define(m4_Narguments,
+`$1 argument`'ifelse(`$1',1,,s)')
+
+
+dnl --------------------------------------------------------------------------
+dnl Additional error checking things.
+
+
+dnl Usage: m4_file_seen()
+dnl
+dnl Record __file__ for the benefit of m4_file_and_line in m4wrap text.
+dnl The basic __file__ macro comes out quoted, like `foo.asm', and
+dnl m4_file_seen_last is defined like that too.
+dnl
+dnl This only needs to be used with something that could generate an error
+dnl message in m4wrap text. The x86 PROLOGUE is the only such at the
+dnl moment (at end of input its m4wrap checks for missing EPILOGUE). A few
+dnl include()s can easily trick this scheme, but you'd expect an EPILOGUE
+dnl in the same file as the PROLOGUE.
+
+define(m4_file_seen,
+m4_assert_numargs(0)
+`ifelse(__file__,`NONE',,
+`define(`m4_file_seen_last',m4_doublequote(__file__))')')
+
+
+dnl Usage: m4_assert_onearg()
+dnl
+dnl Put this, unquoted, at the start of a macro definition to add some code
+dnl to check that one argument is passed to the macro, but with that
+dnl argument allowed to be empty. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_onearg()
+dnl `blah blah $1 blah blah')
+dnl
+dnl Calls "foo(xyz)" or "foo()" are accepted. A call "foo(xyz,abc)" fails.
+dnl A call "foo" fails too, but BSD m4 can't detect this case (GNU and SysV
+dnl m4 can).
+
+define(m4_assert_onearg,
+m4_assert_numargs(0)
+`m4_assert_onearg_internal'(m4_doublequote($`'0),$`#')`dnl ')
+
+dnl Called: m4_assert_onearg(`macroname',$#)
+define(m4_assert_onearg_internal,
+`ifelse($2,1,,
+`m4_error(`$1 expected 1 argument, got 'm4_Narguments(`$2')
+)')')
+
+
+dnl Usage: m4_assert_numargs_range(low,high)
+dnl
+dnl Put this, unquoted, at the start of a macro definition to add some code
+dnl to check that between low and high many arguments get passed to the
+dnl macro. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_numargs_range(3,5)
+dnl `mandatory $1 $2 $3 optional $4 $5 end')
+dnl
+dnl See m4_assert_numargs() for more info.
+
+define(m4_assert_numargs_range,
+m4_assert_numargs(2)
+``m4_assert_numargs_range_internal'(m4_doublequote($`'0),$1,$2,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl Called: m4_assert_numargs_range_internal(`name',low,high,$#,len(`$1'))
+define(m4_assert_numargs_range_internal,
+m4_assert_numargs(5)
+`m4_assert_numargs_range_check(`$1',`$2',`$3',m4_numargs_count(`$4',`$5'))')
+
+dnl Called: m4_assert_numargs_range_check(`name',low,high,gotargs)
+dnl
+dnl If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl should be -1. To ensure a `high' of -1 works, a fudge is applied to
+dnl gotargs if it's 0 and the 0 and -1 cases can't be distinguished.
+dnl
+define(m4_assert_numargs_range_check,
+m4_assert_numargs(4)
+`ifelse(eval($2 <= $4 &&
+ ($4 - ($4==0 && m4_dollarhash_1_if_noparen_p) <= $3)),0,
+`m4_error(`$1 expected $2 to $3 arguments, got 'm4_Narguments(`$4')
+)')')
+
+
+dnl Usage: m4_assert_defined(symbol)
+dnl
+dnl Put this unquoted on a line of its own at the start of a macro
+dnl definition to add some code to check that the given symbol is defined
+dnl when the macro is used. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_defined(`FOO_PREFIX')
+dnl `FOO_PREFIX whatever')
+dnl
+dnl This is a convenient way to check that the user or ./configure or
+dnl whatever has defined the things needed by a macro, as opposed to
+dnl silently generating garbage.
+
+define(m4_assert_defined,
+m4_assert_numargs(1)
+``m4_assert_defined_internal'(m4_doublequote($`'0),``$1'')`dnl '')
+
+dnl Called: m4_assert_defined_internal(`macroname',`define_required')
+define(m4_assert_defined_internal,
+m4_assert_numargs(2)
+`ifdef(`$2',,
+`m4_error(`$1 needs $2 defined
+')')')
+
+
+dnl Usage: m4_not_for_expansion(`SYMBOL')
+dnl define_not_for_expansion(`SYMBOL')
+dnl
+dnl m4_not_for_expansion turns SYMBOL, if defined, into something which
+dnl will give an error if expanded. For example,
+dnl
+dnl m4_not_for_expansion(`PIC')
+dnl
+dnl define_not_for_expansion is the same, but always makes a definition.
+dnl
+dnl These are for symbols that should be tested with ifdef(`FOO',...)
+dnl rather than be expanded as such. They guard against accidentally
+dnl omitting the quotes, as in ifdef(FOO,...). Note though that they only
+dnl catches this when FOO is defined, so be sure to test code both with and
+dnl without each definition.
+
+define(m4_not_for_expansion,
+m4_assert_numargs(1)
+`ifdef(`$1',`define_not_for_expansion(`$1')')')
+
+define(define_not_for_expansion,
+m4_assert_numargs(1)
+`ifelse(defn(`$1'),,,
+`m4_error(``$1' has a non-empty value, maybe it shouldnt be munged with m4_not_for_expansion()
+')')dnl
+define(`$1',`m4_not_for_expansion_internal(`$1')')')
+
+define(m4_not_for_expansion_internal,
+`m4_error(``$1' is not meant to be expanded, perhaps you mean `ifdef(`$1',...)'
+')')
+
+
+dnl --------------------------------------------------------------------------
+dnl Various generic m4 things.
+
+
+dnl Usage: m4_ifdef_anyof_p(`symbol',...)
+dnl
+dnl Expand to 1 if any of the symbols in the argument list are defined, or
+dnl to 0 if not.
+
+define(m4_ifdef_anyof_p,
+`ifelse(eval($#<=1 && m4_length(`$1')==0),1, 0,
+`ifdef(`$1', 1,
+`m4_ifdef_anyof_p(shift($@))')')')
+
+
+dnl Usage: m4_length(string)
+dnl
+dnl Determine the length of a string. This is the same as len(), but
+dnl always expands to a number, working around the BSD len() which
+dnl evaluates to nothing given an empty argument.
+
+define(m4_length,
+m4_assert_onearg()
+`eval(len(`$1')-0)')
+
+
+dnl Usage: m4_stringequal_p(x,y)
+dnl
+dnl Expand to 1 or 0 according as strings x and y are equal or not.
+
+define(m4_stringequal_p,
+`ifelse(`$1',`$2',1,0)')
+
+
+dnl Usage: m4_incr_or_decr(n,last)
+dnl
+dnl Do an incr(n) or decr(n), whichever is in the direction of "last".
+dnl Both n and last must be numbers of course.
+
+define(m4_incr_or_decr,
+m4_assert_numargs(2)
+`ifelse(eval($1<$2),1,incr($1),decr($1))')
+
+
+dnl Usage: forloop(i, first, last, statement)
+dnl
+dnl Based on GNU m4 examples/forloop.m4, but extended.
+dnl
+dnl statement is expanded repeatedly, with i successively defined as
+dnl
+dnl first, first+1, ..., last-1, last
+dnl
+dnl Or if first > last, then it's
+dnl
+dnl first, first-1, ..., last+1, last
+dnl
+dnl If first == last, then one expansion is done.
+dnl
+dnl A pushdef/popdef of i is done to preserve any previous definition (or
+dnl lack of definition). first and last are eval()ed and so can be
+dnl expressions.
+dnl
+dnl forloop_first is defined to 1 on the first iteration, 0 on the rest.
+dnl forloop_last is defined to 1 on the last iteration, 0 on the others.
+dnl Nested forloops are allowed, in which case forloop_first and
+dnl forloop_last apply to the innermost loop that's open.
+dnl
+dnl A simple example,
+dnl
+dnl forloop(i, 1, 2*2+1, `dnl
+dnl iteration number i ... ifelse(forloop_first,1,FIRST)
+dnl ')
+
+
+dnl "i" and "statement" are carefully quoted, but "first" and "last" are
+dnl just plain numbers once eval()ed.
+
+define(`forloop',
+m4_assert_numargs(4)
+`pushdef(`$1',eval(`$2'))dnl
+pushdef(`forloop_first',1)dnl
+pushdef(`forloop_last',0)dnl
+forloop_internal(`$1',eval(`$3'),`$4')`'dnl
+popdef(`forloop_first')dnl
+popdef(`forloop_last')dnl
+popdef(`$1')')
+
+dnl Called: forloop_internal(`var',last,statement)
+define(`forloop_internal',
+m4_assert_numargs(3)
+`ifelse($1,$2,
+`define(`forloop_last',1)$3',
+`$3`'dnl
+define(`forloop_first',0)dnl
+define(`$1',m4_incr_or_decr($1,$2))dnl
+forloop_internal(`$1',$2,`$3')')')
+
+
+dnl Usage: m4_toupper(x)
+dnl m4_tolower(x)
+dnl
+dnl Convert the argument string to upper or lower case, respectively.
+dnl Only one argument accepted.
+dnl
+dnl BSD m4 doesn't take ranges like a-z in translit(), so the full alphabet
+dnl is written out.
+
+define(m4_alphabet_lower, `abcdefghijklmnopqrstuvwxyz')
+define(m4_alphabet_upper, `ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+
+define(m4_toupper,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_lower, m4_alphabet_upper)')
+
+define(m4_tolower,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_upper, m4_alphabet_lower)')
+
+
+dnl Usage: m4_empty_if_zero(x)
+dnl
+dnl Evaluate to x, or to nothing if x is 0. x is eval()ed and so can be an
+dnl expression.
+dnl
+dnl This is useful for x86 addressing mode displacements since forms like
+dnl (%ebx) are one byte shorter than 0(%ebx). A macro `foo' for use as
+dnl foo(%ebx) could be defined with the following so it'll be empty if the
+dnl expression comes out zero.
+dnl
+dnl deflit(`foo', `m4_empty_if_zero(a+b*4-c)')
+dnl
+dnl Naturally this shouldn't be done if, say, a computed jump depends on
+dnl the code being a particular size.
+
+define(m4_empty_if_zero,
+m4_assert_onearg()
+`ifelse(eval($1),0,,eval($1))')
+
+
+dnl Usage: m4_log2(x)
+dnl
+dnl Calculate a logarithm to base 2.
+dnl x must be an integral power of 2, between 2**0 and 2**30.
+dnl x is eval()ed, so it can be an expression.
+dnl An error results if x is invalid.
+dnl
+dnl 2**31 isn't supported, because an unsigned 2147483648 is out of range
+dnl of a 32-bit signed int. Also, the bug in BSD m4 where an eval()
+dnl resulting in 2147483648 (or -2147483648 as the case may be) gives `-('
+dnl means tests like eval(1<<31==(x)) would be necessary, but that then
+dnl gives an unattractive explosion of eval() error messages if x isn't
+dnl numeric.
+
+define(m4_log2,
+m4_assert_numargs(1)
+`m4_log2_internal(0,1,eval(`$1'))')
+
+dnl Called: m4_log2_internal(n,2**n,target)
+define(m4_log2_internal,
+m4_assert_numargs(3)
+`ifelse($2,$3,$1,
+`ifelse($1,30,
+`m4_error(`m4_log2() argument too big or not a power of two: $3
+')',
+`m4_log2_internal(incr($1),eval(2*$2),$3)')')')
+
+
+dnl Usage: m4_div2_towards_zero
+dnl
+dnl m4 division is probably whatever a C signed division is, and C doesn't
+dnl specify what rounding gets used on negatives, so this expression forces
+dnl a rounding towards zero.
+
+define(m4_div2_towards_zero,
+m4_assert_numargs(1)
+`eval((($1) + ((($1)<0) & ($1))) / 2)')
+
+
+dnl Usage: m4_lshift(n,count)
+dnl m4_rshift(n,count)
+dnl
+dnl Calculate n shifted left or right by count many bits. Both n and count
+dnl are eval()ed and so can be expressions.
+dnl
+dnl Negative counts are allowed and mean a shift in the opposite direction.
+dnl Negative n is allowed and right shifts will be arithmetic (meaning
+dnl divide by 2**count, rounding towards zero, also meaning the sign bit is
+dnl duplicated).
+dnl
+dnl Use these macros instead of << and >> in eval() since the basic ccs
+dnl SysV m4 doesn't have those operators.
+
+define(m4_rshift,
+m4_assert_numargs(2)
+`m4_lshift(`$1',-(`$2'))')
+
+define(m4_lshift,
+m4_assert_numargs(2)
+`m4_lshift_internal(eval(`$1'),eval(`$2'))')
+
+define(m4_lshift_internal,
+m4_assert_numargs(2)
+`ifelse(eval($2-0==0),1,$1,
+`ifelse(eval($2>0),1,
+`m4_lshift_internal(eval($1*2),decr($2))',
+`m4_lshift_internal(m4_div2_towards_zero($1),incr($2))')')')
+
+
+dnl Usage: deflit(name,value)
+dnl
+dnl Like define(), but "name" expands like a literal, rather than taking
+dnl arguments. For example "name(%eax)" expands to "value(%eax)".
+dnl
+dnl Limitations:
+dnl
+dnl $ characters in the value part must have quotes to stop them looking
+dnl like macro parameters. For example, deflit(reg,`123+$`'4+567'). See
+dnl defreg() below for handling simple register definitions like $7 etc.
+dnl
+dnl "name()" is turned into "name", unfortunately. In GNU and SysV m4 an
+dnl error is generated when this happens, but in BSD m4 it will happen
+dnl silently. The problem is that in BSD m4 $# is 1 in both "name" or
+dnl "name()", so there's no way to differentiate them. Because we want
+dnl plain "name" to turn into plain "value", we end up with "name()"
+dnl turning into plain "value" too.
+dnl
+dnl "name(foo)" will lose any whitespace after commas in "foo", for example
+dnl "disp(%eax, %ecx)" would become "128(%eax,%ecx)".
+dnl
+dnl These parentheses oddities shouldn't matter in assembler text, but if
+dnl they do the suggested workaround is to write "name ()" or "name (foo)"
+dnl to stop the parentheses looking like a macro argument list. If a space
+dnl isn't acceptable in the output, then write "name`'()" or "name`'(foo)".
+dnl The `' is stripped when read, but again stops the parentheses looking
+dnl like parameters.
+
+dnl Quoting for deflit_emptyargcheck is similar to m4_assert_numargs. The
+dnl stuff in the ifelse gives a $#, $1 and $@ evaluated in the new macro
+dnl created, not in deflit.
+define(deflit,
+m4_assert_numargs(2)
+`define(`$1',
+`deflit_emptyargcheck'(``$1'',$`#',m4_doublequote($`'1))`dnl
+$2`'dnl
+ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')
+
+dnl Called: deflit_emptyargcheck(macroname,$#,`$1')
+define(deflit_emptyargcheck,
+`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)
+')')')
+
+
+dnl Usage: m4_assert(`expr')
+dnl
+dnl Test a compile-time requirement with an m4 expression. The expression
+dnl should be quoted, and will be eval()ed and expected to be non-zero.
+dnl For example,
+dnl
+dnl m4_assert(`FOO*2+6 < 14')
+
+define(m4_assert,
+m4_assert_numargs(1)
+`ifelse(eval($1),1,,
+`m4_error(`assertion failed: $1
+')')')
+
+
+dnl --------------------------------------------------------------------------
+dnl Various assembler things, not specific to any particular CPU.
+dnl
+
+
+dnl Usage: include_mpn(`filename')
+dnl
+dnl Like include(), but adds a path to the mpn source directory. For
+dnl example,
+dnl
+dnl include_mpn(`sparc64/addmul_1h.asm')
+
+define(include_mpn,
+m4_assert_numargs(1)
+m4_assert_defined(`CONFIG_TOP_SRCDIR')
+`include(CONFIG_TOP_SRCDIR`/mpn/$1')')
+
+
+dnl Usage: C comment ...
+dnl
+dnl "C" works like a FORTRAN-style comment character. This can be used for
+dnl comments to the right of assembly instructions, where just dnl would
+dnl remove the linefeed, and concatenate adjacent lines.
+dnl
+dnl "C" and/or "dnl" are useful when an assembler doesn't support comments,
+dnl or where different assemblers for a particular CPU have different
+dnl comment styles. The intermediate ".s" files will end up with no
+dnl comments, just code.
+dnl
+dnl Using "C" is not intended to cause offence to anyone who doesn't like
+dnl FORTRAN; but if that happens it's an unexpected bonus.
+
+define(C, `
+dnl')
+
+
+dnl Various possible defines passed from the Makefile that are to be tested
+dnl with ifdef() rather than be expanded.
+
+m4_not_for_expansion(`PIC')
+
+dnl aors_n
+m4_not_for_expansion(`OPERATION_add_n')
+m4_not_for_expansion(`OPERATION_sub_n')
+
+dnl aorsmul_n
+m4_not_for_expansion(`OPERATION_addmul_1')
+m4_not_for_expansion(`OPERATION_submul_1')
+
+dnl logops_n
+m4_not_for_expansion(`OPERATION_and_n')
+m4_not_for_expansion(`OPERATION_andn_n')
+m4_not_for_expansion(`OPERATION_nand_n')
+m4_not_for_expansion(`OPERATION_ior_n')
+m4_not_for_expansion(`OPERATION_iorn_n')
+m4_not_for_expansion(`OPERATION_nior_n')
+m4_not_for_expansion(`OPERATION_xor_n')
+m4_not_for_expansion(`OPERATION_xnor_n')
+
+dnl popham
+m4_not_for_expansion(`OPERATION_popcount')
+m4_not_for_expansion(`OPERATION_hamdist')
+
+
+dnl Usage: m4_config_gmp_mparam(`symbol')
+dnl
+dnl Check that `symbol' is defined. If it isn't, issue an error and
+dnl terminate immediately. The error message explains that the symbol
+dnl should be in config.m4, copied from gmp-mparam.h.
+dnl
+dnl Processing is terminated immediately since missing something like
+dnl KARATSUBA_SQR_THRESHOLD can lead to infinite loops with endless error
+dnl messages.
+
+define(m4_config_gmp_mparam,
+m4_assert_numargs(1)
+`ifdef(`$1',,
+`m4_error(`$1 is not defined.
+ "configure" should have extracted this from gmp-mparam.h and put it
+ in config.m4, but somehow this has failed.
+')m4exit(1)')')
+
+
+dnl Usage: defreg(name,reg)
+dnl
+dnl Give a name to a $ style register. For example,
+dnl
+dnl defreg(foo,$12)
+dnl
+dnl defreg() inserts an extra pair of quotes after the $ so that it's not
+dnl interpreted as an m4 macro parameter, ie. foo is actually $`'12. m4
+dnl strips those quotes when foo is expanded.
+dnl
+dnl deflit() is used to make the new definition, so it will expand
+dnl literally even if followed by parentheses ie. foo(99) will become
+dnl $12(99). (But there's nowhere that would be used is there?)
+dnl
+dnl When making further definitions from existing defreg() macros, remember
+dnl to use defreg() again to protect the $ in the new definitions too. For
+dnl example,
+dnl
+dnl defreg(a0,$4)
+dnl defreg(a1,$5)
+dnl ...
+dnl
+dnl defreg(PARAM_DST,a0)
+dnl
+dnl This is only because a0 is expanding at the time the PARAM_DST
+dnl definition is made, leaving a literal $4 that must be re-quoted. On
+dnl the other hand in something like the following ra is only expanded when
+dnl ret is used and its $`'31 protection will have its desired effect at
+dnl that time.
+dnl
+dnl defreg(ra,$31)
+dnl ...
+dnl define(ret,`j ra')
+dnl
+dnl Note that only $n forms are meant to be used here, and something like
+dnl 128($30) doesn't get protected and will come out wrong.
+
+define(defreg,
+m4_assert_numargs(2)
+`deflit(`$1',
+substr(`$2',0,1)``''substr(`$2',1))')
+
+
+dnl Usage: m4_instruction_wrapper(num)
+dnl
+dnl Put this, unquoted, on a line on its own, at the start of a macro
+dnl that's a wrapper around an assembler instruction. It adds code to give
+dnl a descriptive error message if the macro is invoked without arguments.
+dnl
+dnl For example, suppose jmp needs to be wrapped,
+dnl
+dnl define(jmp,
+dnl m4_instruction_wrapper()
+dnl m4_assert_numargs(1)
+dnl `.byte 0x42
+dnl .long $1
+dnl nop')
+dnl
+dnl The point of m4_instruction_wrapper is to get a better error message
+dnl than m4_assert_numargs would give if jmp is accidentally used as plain
+dnl "jmp foo" instead of the intended "jmp( foo)". "jmp()" with no
+dnl argument also provokes the error message.
+dnl
+dnl m4_instruction_wrapper should only be used with wrapped instructions
+dnl that take arguments, since obviously something meant to be used as
+dnl plain "ret", say, doesn't want to give an error when used that way.
+
+define(m4_instruction_wrapper,
+m4_assert_numargs(0)
+``m4_instruction_wrapper_internal'(m4_doublequote($`'0),dnl
+m4_doublequote(ifdef(`__file__',__file__,`the m4 sources')),dnl
+$`#',m4_doublequote($`'1))`dnl'')
+
+dnl Called: m4_instruction_wrapper_internal($0,`filename',$#,$1)
+define(m4_instruction_wrapper_internal,
+`ifelse(eval($3<=1 && m4_length(`$4')==0),1,
+`m4_error(`$1 is a macro replacing that instruction and needs arguments, see $2 for details
+')')')
+
+
+dnl Usage: UNROLL_LOG2, UNROLL_MASK, UNROLL_BYTES
+dnl CHUNK_LOG2, CHUNK_MASK, CHUNK_BYTES
+dnl
+dnl When code supports a variable amount of loop unrolling, the convention
+dnl is to define UNROLL_COUNT to the number of limbs processed per loop.
+dnl When testing code this can be varied to see how much the loop overhead
+dnl is costing. For example,
+dnl
+dnl deflit(UNROLL_COUNT, 32)
+dnl
+dnl If the forloop() generating the unrolled loop has a pattern processing
+dnl more than one limb, the convention is to express this with CHUNK_COUNT.
+dnl For example,
+dnl
+dnl deflit(CHUNK_COUNT, 2)
+dnl
+dnl The LOG2, MASK and BYTES definitions below are derived from these COUNT
+dnl definitions. If COUNT is redefined, the LOG2, MASK and BYTES follow
+dnl the new definition automatically.
+dnl
+dnl LOG2 is the log base 2 of COUNT. MASK is COUNT-1, which can be used as
+dnl a bit mask. BYTES is BYTES_PER_MP_LIMB*COUNT, the number of bytes
+dnl processed in each unrolled loop.
+dnl
+dnl BYTES_PER_MP_LIMB is defined in a CPU specific m4 include file. It
+dnl exists only so the BYTES definitions here can be common to all CPUs.
+dnl In the actual code for a given CPU, an explicit 4 or 8 may as well be
+dnl used because the code is only for a particular CPU, it doesn't need to
+dnl be general.
+dnl
+dnl Note that none of these macros do anything except give conventional
+dnl names to commonly used things. You still have to write your own
+dnl expressions for a forloop() and the resulting address displacements.
+dnl Something like the following would be typical for 4 bytes per limb.
+dnl
+dnl forloop(`i',0,UNROLL_COUNT-1,`
+dnl deflit(`disp',eval(i*4))
+dnl ...
+dnl ')
+dnl
+dnl Or when using CHUNK_COUNT,
+dnl
+dnl forloop(`i',0,UNROLL_COUNT/CHUNK_COUNT-1,`
+dnl deflit(`disp0',eval(i*CHUNK_COUNT*4))
+dnl deflit(`disp1',eval(disp0+4))
+dnl ...
+dnl ')
+dnl
+dnl Clearly `i' can be run starting from 1, or from high to low or whatever
+dnl best suits.
+
+deflit(UNROLL_LOG2,
+m4_assert_defined(`UNROLL_COUNT')
+`m4_log2(UNROLL_COUNT)')
+
+deflit(UNROLL_MASK,
+m4_assert_defined(`UNROLL_COUNT')
+`eval(UNROLL_COUNT-1)')
+
+deflit(UNROLL_BYTES,
+m4_assert_defined(`UNROLL_COUNT')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(UNROLL_COUNT * BYTES_PER_MP_LIMB)')
+
+deflit(CHUNK_LOG2,
+m4_assert_defined(`CHUNK_COUNT')
+`m4_log2(CHUNK_COUNT)')
+
+deflit(CHUNK_MASK,
+m4_assert_defined(`CHUNK_COUNT')
+`eval(CHUNK_COUNT-1)')
+
+deflit(CHUNK_BYTES,
+m4_assert_defined(`CHUNK_COUNT')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(CHUNK_COUNT * BYTES_PER_MP_LIMB)')
+
+
+dnl Usage: MPN(name)
+dnl
+dnl Add MPN_PREFIX to a name.
+dnl MPN_PREFIX defaults to "__gmpn_" if not defined.
+
+ifdef(`MPN_PREFIX',,
+`define(`MPN_PREFIX',`__gmpn_')')
+
+define(MPN,
+m4_assert_numargs(1)
+`MPN_PREFIX`'$1')
+
+
+dnl Usage: mpn_add_n, etc
+dnl
+dnl Convenience definitions using MPN(), like the #defines in gmp.h. Each
+dnl function that might be implemented in assembler is here.
+
+define(define_mpn,
+m4_assert_numargs(1)
+`define(`mpn_$1',`MPN(`$1')')')
+
+define_mpn(add)
+define_mpn(add_1)
+define_mpn(add_n)
+define_mpn(add_nc)
+define_mpn(addmul_1)
+define_mpn(addmul_1c)
+define_mpn(addsub_n)
+define_mpn(addsub_nc)
+define_mpn(and_n)
+define_mpn(andn_n)
+define_mpn(bdivmod)
+define_mpn(cmp)
+define_mpn(com_n)
+define_mpn(copyd)
+define_mpn(copyi)
+define_mpn(divexact_by3c)
+define_mpn(divrem)
+define_mpn(divrem_1)
+define_mpn(divrem_1c)
+define_mpn(divrem_2)
+define_mpn(divrem_classic)
+define_mpn(divrem_newton)
+define_mpn(dump)
+define_mpn(gcd)
+define_mpn(gcd_1)
+define_mpn(gcdext)
+define_mpn(get_str)
+define_mpn(hamdist)
+define_mpn(invert_limb)
+define_mpn(ior_n)
+define_mpn(iorn_n)
+define_mpn(kara_mul_n)
+define_mpn(kara_sqr_n)
+define_mpn(lshift)
+define_mpn(lshiftc)
+define_mpn(mod_1)
+define_mpn(mod_1c)
+define_mpn(mul)
+define_mpn(mul_1)
+define_mpn(mul_1c)
+define_mpn(mul_basecase)
+define_mpn(mul_n)
+define_mpn(perfect_square_p)
+define_mpn(popcount)
+define_mpn(preinv_mod_1)
+define_mpn(nand_n)
+define_mpn(nior_n)
+define_mpn(random)
+define_mpn(random2)
+define_mpn(rshift)
+define_mpn(rshiftc)
+define_mpn(scan0)
+define_mpn(scan1)
+define_mpn(set_str)
+define_mpn(sqr_basecase)
+define_mpn(sub_n)
+define_mpn(sqrtrem)
+define_mpn(sub)
+define_mpn(sub_1)
+define_mpn(sub_n)
+define_mpn(sub_nc)
+define_mpn(submul_1)
+define_mpn(submul_1c)
+define_mpn(toom3_mul_n)
+define_mpn(toom3_sqr_n)
+define_mpn(umul_ppmm)
+define_mpn(udiv_qrnnd)
+define_mpn(xnor_n)
+define_mpn(xor_n)
+
+define(`ASM_START',
+ `')
+
+define(`PROLOGUE',
+ `
+ TEXT
+ ALIGN(4)
+ GLOBL GSYM_PREFIX`$1'
+ TYPE(GSYM_PREFIX`$1',`function')
+GSYM_PREFIX`$1':')
+
+define(`EPILOGUE',
+ `
+ SIZE(GSYM_PREFIX`$1',.-GSYM_PREFIX`$1')')
+
+dnl LSYM_PREFIX might be L$, so defn() must be used to quote it or the L
+dnl will expand as the L macro, an infinite recursion.
+define(`L',`defn(`LSYM_PREFIX')$1')
+
+define(`INT32',
+ `
+ ALIGN(4)
+$1:
+ W32 $2
+ ')
+
+define(`INT64',
+ `
+ ALIGN(8)
+$1:
+ W32 $2
+ W32 $3
+ ')
+
+
+dnl Usage: ALIGN(bytes)
+dnl
+dnl Emit a ".align" directive. The alignment is specified in bytes, and
+dnl will normally need to be a power of 2. The actual ".align" generated
+dnl is either bytes or logarithmic according to what ./configure detects.
+dnl
+dnl ALIGN_FILL_0x90, if defined and equal to "yes", means a ", 0x90" should
+dnl be appended (this is for x86).
+
+define(ALIGN,
+m4_assert_numargs(1)
+m4_assert_defined(`ALIGN_LOGARITHMIC')
+`.align ifelse(ALIGN_LOGARITHMIC,yes,`m4_log2($1)',`eval($1)')dnl
+ifelse(ALIGN_FILL_0x90,yes,`, 0x90')')
+
+
+dnl Usage: MULFUNC_PROLOGUE(function function...)
+dnl
+dnl A dummy macro which is grepped for by ./configure to know what
+dnl functions a multi-function file is providing. Use this if there aren't
+dnl explicit PROLOGUE()s for each possible function.
+dnl
+dnl Multiple MULFUNC_PROLOGUEs can be used, or just one with the function
+dnl names separated by spaces.
+
+define(`MULFUNC_PROLOGUE',
+m4_assert_numargs(1)
+`')
+
+
+divert`'dnl
diff --git a/rts/gmp/mpn/clipper/add_n.s b/rts/gmp/mpn/clipper/add_n.s
new file mode 100644
index 0000000000..538a1caed0
--- /dev/null
+++ b/rts/gmp/mpn/clipper/add_n.s
@@ -0,0 +1,48 @@
+; Clipper __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+.text
+ .align 16
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ subq $8,sp
+ storw r6,(sp)
+ loadw 12(sp),r2
+ loadw 16(sp),r3
+ loadq $0,r6 ; clear carry-save register
+
+.Loop: loadw (r1),r4
+ loadw (r2),r5
+ addwc r6,r6 ; restore carry from r6
+ addwc r5,r4
+ storw r4,(r0)
+ subwc r6,r6 ; save carry in r6
+ addq $4,r0
+ addq $4,r1
+ addq $4,r2
+ subq $1,r3
+ brne .Loop
+
+ negw r6,r0
+ loadw (sp),r6
+ addq $8,sp
+ ret sp
diff --git a/rts/gmp/mpn/clipper/mul_1.s b/rts/gmp/mpn/clipper/mul_1.s
new file mode 100644
index 0000000000..c0c756488c
--- /dev/null
+++ b/rts/gmp/mpn/clipper/mul_1.s
@@ -0,0 +1,47 @@
+; Clipper __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+.text
+ .align 16
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ subq $8,sp
+ storw r6,(sp)
+ loadw 12(sp),r2
+ loadw 16(sp),r3
+ loadq $0,r6 ; clear carry limb
+
+.Loop: loadw (r1),r4
+ mulwux r3,r4
+ addw r6,r4 ; add old carry limb into low product limb
+ loadq $0,r6
+ addwc r5,r6 ; propagate cy into high product limb
+ storw r4,(r0)
+ addq $4,r0
+ addq $4,r1
+ subq $1,r2
+ brne .Loop
+
+ movw r6,r0
+ loadw 0(sp),r6
+ addq $8,sp
+ ret sp
diff --git a/rts/gmp/mpn/clipper/sub_n.s b/rts/gmp/mpn/clipper/sub_n.s
new file mode 100644
index 0000000000..44d8797289
--- /dev/null
+++ b/rts/gmp/mpn/clipper/sub_n.s
@@ -0,0 +1,48 @@
+; Clipper __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+.text
+ .align 16
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ subq $8,sp
+ storw r6,(sp)
+ loadw 12(sp),r2
+ loadw 16(sp),r3
+ loadq $0,r6 ; clear carry-save register
+
+.Loop: loadw (r1),r4
+ loadw (r2),r5
+ addwc r6,r6 ; restore carry from r6
+ subwc r5,r4
+ storw r4,(r0)
+ subwc r6,r6 ; save carry in r6
+ addq $4,r0
+ addq $4,r1
+ addq $4,r2
+ subq $1,r3
+ brne .Loop
+
+ negw r6,r0
+ loadw (sp),r6
+ addq $8,sp
+ ret sp
diff --git a/rts/gmp/mpn/cray/README b/rts/gmp/mpn/cray/README
new file mode 100644
index 0000000000..8195c67e21
--- /dev/null
+++ b/rts/gmp/mpn/cray/README
@@ -0,0 +1,14 @@
+The (poorly optimized) code in this directory was originally written for a
+j90 system, but finished on a c90. It should work on all Cray vector
+computers. For the T3E and T3D systems, the `alpha' subdirectory at the
+same level as the directory containing this file, is much better.
+
+* `+' seems to be faster than `|' when combining carries.
+
+* It is possible that the best multiply performance would be achived by
+ storing only 24 bits per element, and using lazy carry propagation. Before
+ calling i24mult, full carry propagation would be needed.
+
+* Supply tasking versions of the C loops.
+
+
diff --git a/rts/gmp/mpn/cray/add_n.c b/rts/gmp/mpn/cray/add_n.c
new file mode 100644
index 0000000000..1fdb394993
--- /dev/null
+++ b/rts/gmp/mpn/cray/add_n.c
@@ -0,0 +1,96 @@
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
+ For Cray vector processors.
+
+ Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_add_n (c, a, b, n)
+ mp_ptr c;
+ mp_srcptr a, b;
+ mp_size_t n;
+{
+ mp_size_t i;
+ mp_size_t nm1 = n - 1;
+ int more_carries = 0;
+ int carry_out;
+
+ /* For small operands the non-vector code is faster. */
+ if (n < 16)
+ goto sequential;
+
+ if (a == c || b == c)
+ {
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ if (c == a)
+ {
+ /* allocate temp space for a */
+ mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (ax, a, n);
+ a = (mp_srcptr) ax;
+ }
+ if (c == b)
+ {
+ /* allocate temp space for b */
+ mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (bx, b, n);
+ b = (mp_srcptr) bx;
+ }
+ carry_out = mpn_add_n (c, a, b, n);
+ TMP_FREE (marker);
+ return carry_out;
+ }
+
+ carry_out = a[nm1] + b[nm1] < a[nm1];
+
+#pragma _CRI ivdep /* Cray PVP systems */
+ for (i = nm1; i > 0; i--)
+ {
+ int cy_in;
+ cy_in = a[i - 1] + b[i - 1] < a[i - 1];
+ c[i] = a[i] + b[i] + cy_in;
+ more_carries += c[i] < cy_in;
+ }
+ c[0] = a[0] + b[0];
+
+ if (more_carries)
+ {
+ /* This won't vectorize, but we should come here rarely. */
+ int cy;
+ sequential:
+ cy = 0;
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t ai, ci, t;
+ ai = a[i];
+ t = b[i] + cy;
+ cy = t < cy;
+ ci = ai + t;
+ cy += ci < ai;
+ c[i] = ci;
+ }
+ carry_out = cy;
+ }
+
+ return carry_out;
+}
diff --git a/rts/gmp/mpn/cray/addmul_1.c b/rts/gmp/mpn/cray/addmul_1.c
new file mode 100644
index 0000000000..031b4e8e8d
--- /dev/null
+++ b/rts/gmp/mpn/cray/addmul_1.c
@@ -0,0 +1,46 @@
+/* mpn_addmul_1 for Cray PVP.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+ mp_ptr p0, p1, tp;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+
+ GMPN_MULWW (p1, p0, up, &n, &limb);
+ cy_limb = mpn_add_n (tp, rp, p0, n);
+ rp[0] = tp[0];
+ cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1);
+ cy_limb += p1[n - 1];
+
+ TMP_FREE (marker);
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/cray/gmp-mparam.h b/rts/gmp/mpn/cray/gmp-mparam.h
new file mode 100644
index 0000000000..14f7b8e05b
--- /dev/null
+++ b/rts/gmp/mpn/cray/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 64
+#define BITS_PER_SHORTINT 32
+#define BITS_PER_CHAR 8
diff --git a/rts/gmp/mpn/cray/mul_1.c b/rts/gmp/mpn/cray/mul_1.c
new file mode 100644
index 0000000000..0c8750b4ac
--- /dev/null
+++ b/rts/gmp/mpn/cray/mul_1.c
@@ -0,0 +1,44 @@
+/* mpn_mul_1 for Cray PVP.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+ mp_ptr p0, p1;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+
+ GMPN_MULWW (p1, p0, up, &n, &limb);
+ rp[0] = p0[0];
+ cy_limb = mpn_add_n (rp + 1, p0 + 1, p1, n - 1);
+ cy_limb += p1[n - 1];
+
+ TMP_FREE (marker);
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/cray/mulww.f b/rts/gmp/mpn/cray/mulww.f
new file mode 100644
index 0000000000..99507c1e44
--- /dev/null
+++ b/rts/gmp/mpn/cray/mulww.f
@@ -0,0 +1,54 @@
+c Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+c Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+c This file is part of the GNU MP Library.
+
+c The GNU MP Library is free software; you can redistribute it and/or
+c modify it under the terms of the GNU Lesser General Public License as
+c published by the Free Software Foundation; either version 2.1 of the
+c License, or (at your option) any later version.
+
+c The GNU MP Library is distributed in the hope that it will be useful,
+c but WITHOUT ANY WARRANTY; without even the implied warranty of
+c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+c Lesser General Public License for more details.
+
+c You should have received a copy of the GNU Lesser General Public
+c License along with the GNU MP Library; see the file COPYING.LIB. If
+c not, write to the Free Software Foundation, Inc., 59 Temple Place -
+c Suite 330, Boston, MA 02111-1307, USA.
+
+c p1[] = hi(a[]*s); the upper limbs of each product
+c p0[] = low(a[]*s); the corresponding lower limbs
+c n is number of limbs in the vectors
+
+ subroutine gmpn_mulww(p1,p0,a,n,s)
+ integer*8 p1(0:*),p0(0:*),a(0:*),s
+ integer n
+
+ integer*8 a0,a1,a2,s0,s1,s2,c
+ integer*8 ai,t0,t1,t2,t3,t4
+
+ s0 = shiftl(and(s,4194303),24)
+ s1 = shiftl(and(shiftr(s,22),4194303),24)
+ s2 = shiftl(and(shiftr(s,44),4194303),24)
+
+ do i = 0,n-1
+ ai = a(i)
+ a0 = shiftl(and(ai,4194303),24)
+ a1 = shiftl(and(shiftr(ai,22),4194303),24)
+ a2 = shiftl(and(shiftr(ai,44),4194303),24)
+
+ t0 = i24mult(a0,s0)
+ t1 = i24mult(a0,s1)+i24mult(a1,s0)
+ t2 = i24mult(a0,s2)+i24mult(a1,s1)+i24mult(a2,s0)
+ t3 = i24mult(a1,s2)+i24mult(a2,s1)
+ t4 = i24mult(a2,s2)
+
+ p0(i)=shiftl(t2,44)+shiftl(t1,22)+t0
+ c=shiftr(shiftr(t0,22)+and(t1,4398046511103)+
+ $ shiftl(and(t2,1048575),22),42)
+ p1(i)=shiftl(t4,24)+shiftl(t3,2)+shiftr(t2,20)+shiftr(t1,42)+c
+ end do
+ end
diff --git a/rts/gmp/mpn/cray/mulww.s b/rts/gmp/mpn/cray/mulww.s
new file mode 100644
index 0000000000..890cdcf94d
--- /dev/null
+++ b/rts/gmp/mpn/cray/mulww.s
@@ -0,0 +1,245 @@
+* Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+* Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+* This file is generated from mulww.f in this same directory.
+
+* This file is part of the GNU MP Library.
+
+* The GNU MP Library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 2.1 of the
+* License, or (at your option) any later version.
+
+* The GNU MP Library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+
+* You should have received a copy of the GNU Lesser General Public
+* License along with the GNU MP Library; see the file COPYING.LIB. If
+* not, write to the Free Software Foundation, Inc., 59 Temple Place -
+* Suite 330, Boston, MA 02111-1307, USA.
+
+ IDENT GMPN_MULWW
+**********************************************
+* Assemble with Cal Version 2.0 *
+* *
+* Generated by CFT77 6.0.4.19 *
+* on 06/27/00 at 04:34:13 *
+* *
+**********************************************
+* ALLOW UNDERSCORES IN IDENTIFIERS
+ EDIT OFF
+ FORMAT NEW
+@DATA SECTION DATA,CM
+@DATA = W.*
+ CON O'0000000000040000000000
+ CON O'0435152404713723252514 ;GMPN_MUL 1
+ CON O'0535270000000000000000 ;WW 1
+ CON O'0000000000000001200012 ;trbk tbl 1
+ VWD 32/0,32/P.GMPN_MULWW ;trbk tbl 1
+ CON O'0014003000000000001416 ;trbk tbl 1
+ CON O'0000000000000000000011 ;trbk tbl 1
+ CON O'0000000000000000000215 ;trbk tbl 1
+ BSSZ 1 ;trbk tbl 1
+@CODE SECTION CODE
+@CODE = P.*
+L3 = P.* ; 1
+ A0 A6 ;arg base 1
+ A5 6 ;num Darg 1
+ B03,A5 0,A0 ;load DAs 1
+ A0 A1+A2 ; 1
+ A5 1 ;num Ts 1
+ 0,A0 T00,A5 ; 1
+ B02 A2 ;new base 1
+ B66 A3 ;stk top 1
+ B01 A6 ;arg base 1
+ A7 P.L4 ;ofrn rtn 1
+ B00 A7 ;return 1
+ A6 @DATA ; 1
+ J $STKOFEN ;$STKOFEN 1
+GMPN_MULWW = P.* ; 1
+ A0 @DATA+3 ;(trbk) 1
+ B77 A0 ;(trbk) 1
+ A1 13 ;num Bs 1
+ A0 B66 ;stk top 1
+ A2 B66 ;stk tmp 1
+ A4 B67 ;stk limt 1
+ 0,A0 B77,A1 ; 1
+ A7 782 ;stk size 1
+ A3 A2+A7 ; 1
+ A0 A4-A3 ; 1
+ JAM L3 ;overflow 1
+ A0 A6 ;arg base 1
+ A5 6 ;num Darg 1
+ B03,A5 0,A0 ;load DAs 1
+ A0 A1+A2 ; 1
+ A5 1 ;num Ts 1
+ 0,A0 T00,A5 ; 1
+ B02 A2 ;new base 1
+ B66 A3 ;new top 1
+ B01 A6 ;arg base 1
+L4 = P.* ;ofrn rtn 1
+ A7 B07 ;regs 14
+ S7 0,A7 ; 14
+ A6 B10 ;regs 9
+ S6 0,A6 ; 9
+ S5 1 ; 14
+ S4 <22 ; 9
+ S7 S7-S5 ; 14
+ S5 #S7 ; 14
+ T00 S6 ;regs 10
+ S6 S6>22 ; 10
+ S7 T00 ;regs 11
+ S7 S7>44 ; 11
+ S3 T00 ;regs 9
+ S3 S3&S4 ; 9
+ S6 S6&S4 ; 10
+ S7 S7&S4 ; 11
+ S3 S3<24 ; 9
+ S6 S6<24 ; 10
+ S7 S7<24 ; 11
+ S0 S5 ;regs 14
+ S4 S5 ;regs 14
+ S1 S6 ;regs 14
+ S2 S3 ;regs 14
+ S3 S7 ;regs 14
+ JSP L5 ; 14
+L6 = P.* ; 14
+ S7 -S4 ; 14
+ A2 S7 ;regs 14
+ VL A2 ;regs 14
+ A3 B06 ;s_bt_sp 14
+ A5 B05 ;s_bt_sp 14
+ A4 B04 ;s_bt_sp 14
+ A1 VL ; 14
+ A2 S4 ;regs 14
+L7 = P.* ; 14
+ A0 A3 ;regs 15
+ VL A1 ;regs 15
+ V7 ,A0,1 ; 15
+ B11 A5 ;s_bt_sp 15
+ A7 22 ; 17
+ B12 A4 ;s_bt_sp 17
+ V6 V7>A7 ; 17
+ B13 A3 ;s_bt_sp 17
+ S7 <22 ; 17
+ A3 B02 ;s_bt_sp 17
+ V5 S7&V6 ; 17
+ A6 24 ; 17
+ V4 V5<A6 ; 17
+ V3 S1*FV4 ; 22
+ V2 S7&V7 ; 16
+ V1 V2<A6 ; 16
+ V0 S3*FV1 ; 22
+ V6 V0+V3 ; 22
+ A5 44 ; 18
+ V5 V7>A5 ; 18
+ V2 S1*FV1 ; 21
+ V3 S7&V5 ; 18
+ A0 14 ; 34
+ B77 A0 ;regs 34
+ A4 B77 ;regs 34
+ A0 A4+A3 ; 34
+ ,A0,1 V2 ;v_ld_str 34
+ V0 V3<A6 ; 18
+ V7 S2*FV1 ; 20
+ A4 142 ; 34
+ A0 A4+A3 ; 34
+ ,A0,1 V7 ;v_ld_str 34
+ V5 V7>A7 ; 28
+ V2 S2*FV0 ; 22
+ V3 V6+V2 ; 22
+ S7 <20 ; 28
+ V1 S7&V3 ; 28
+ A4 270 ; 34
+ A0 A4+A3 ; 34
+ ,A0,1 V0 ;v_ld_str 34
+ A4 14 ; 34
+ A0 A4+A3 ; 34
+ V7 ,A0,1 ;v_ld_str 34
+ V6 V1<A7 ; 28
+ V2 S2*FV4 ; 21
+ V0 V7+V2 ; 21
+ S7 <42 ; 28
+ V1 S7&V0 ; 28
+ A4 398 ; 34
+ A0 A4+A3 ; 34
+ ,A0,1 V0 ;v_ld_str 34
+ V7 S3*FV4 ; 23
+ V2 V5+V1 ; 28
+ V0 V3<A5 ; 26
+ A5 526 ; 34
+ A0 A5+A3 ; 34
+ ,A0,1 V0 ;v_ld_str 34
+ A5 270 ; 34
+ A0 A5+A3 ; 34
+ V4 ,A0,1 ;v_ld_str 34
+ V5 V2+V6 ; 28
+ A5 20 ; 32
+ V1 V3>A5 ; 32
+ V0 S1*FV4 ; 23
+ A5 654 ; 34
+ A0 A5+A3 ; 34
+ ,A0,1 V1 ;v_ld_str 34
+ V6 V7+V0 ; 23
+ A5 2 ; 32
+ V2 V6<A5 ; 32
+ V3 S3*FV4 ; 24
+ A5 142 ; 34
+ A0 A5+A3 ; 34
+ V1 ,A0,1 ;v_ld_str 34
+ A5 526 ; 34
+ A0 A5+A3 ; 34
+ V7 ,A0,1 ;v_ld_str 34
+ V0 V1+V7 ; 26
+ V6 V3<A6 ; 32
+ V4 V6+V2 ; 32
+ A6 42 ; 28
+ V7 V5>A6 ; 28
+ A5 654 ; 34
+ CPW ;cmr_vrsp 34
+ A0 A5+A3 ; 34
+ V1 ,A0,1 ;v_ld_str 34
+ A5 398 ; 34
+ A0 A5+A3 ; 34
+ V3 ,A0,1 ;v_ld_str 34
+ V6 V4+V1 ; 32
+ V2 V3>A6 ; 32
+ V5 V6+V2 ; 32
+ A6 B12 ;s_bt_sp 32
+ V4 V3<A7 ; 26
+ A7 B13 ;regs 34
+ A3 A7+A1 ; 34
+ A7 B11 ;regs 34
+ A5 A7+A1 ; 34
+ A4 A6+A1 ; 34
+ A7 A2+A1 ; 34
+ A0 A2+A1 ; 34
+ A2 128 ; 34
+ B13 A0 ;s_bt_sp 34
+ V1 V0+V4 ; 26
+ A0 B11 ;regs 31
+ ,A0,1 V1 ; 31
+ V6 V5+V7 ; 33
+ A0 A6 ;regs 33
+ ,A0,1 V6 ; 33
+ A0 B13 ;regs 34
+ A1 A2 ;regs 34
+ A2 A7 ;regs 34
+ JAN L7 ; 34
+L8 = P.* ; 34
+L5 = P.* ; 34
+ S1 0 ; 35
+ A0 B02 ; 35
+ A2 B02 ; 35
+ A1 13 ;num Bs 35
+ B66 A0 ; 35
+ B77,A1 0,A0 ; 35
+ A0 A2+A1 ; 35
+ A1 1 ;num Ts 35
+ T00,A1 0,A0 ; 35
+ J B00 ; 35
+ EXT $STKOFEN:p
+ ENTRY GMPN_MULWW
+ END
diff --git a/rts/gmp/mpn/cray/sub_n.c b/rts/gmp/mpn/cray/sub_n.c
new file mode 100644
index 0000000000..902e07a727
--- /dev/null
+++ b/rts/gmp/mpn/cray/sub_n.c
@@ -0,0 +1,97 @@
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+ For Cray vector processors.
+
+ Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_sub_n (c, a, b, n)
+ mp_ptr c;
+ mp_srcptr a, b;
+ mp_size_t n;
+{
+ mp_size_t i;
+ mp_size_t nm1 = n - 1;
+ int more_carries = 0;
+ int carry_out;
+
+ /* For small operands the non-vector code is faster. */
+ if (n < 16)
+ goto sequential;
+
+ if (a == c || b == c)
+ {
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ if (c == a)
+ {
+ /* allocate temp space for a */
+ mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (ax, a, n);
+ a = (mp_srcptr) ax;
+ }
+ if (c == b)
+ {
+ /* allocate temp space for b */
+ mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (bx, b, n);
+ b = (mp_srcptr) bx;
+ }
+ carry_out = mpn_sub_n (c, a, b, n);
+ TMP_FREE (marker);
+ return carry_out;
+ }
+
+ carry_out = a[nm1] < b[nm1];
+
+#pragma _CRI ivdep /* Cray PVP systems */
+ for (i = nm1; i > 0; i--)
+ {
+ int cy_in; mp_limb_t t;
+ cy_in = a[i - 1] < b[i - 1];
+ t = a[i] - b[i];
+ more_carries += t < cy_in;
+ c[i] = t - cy_in;
+ }
+ c[0] = a[0] - b[0];
+
+ if (more_carries)
+ {
+ /* This won't vectorize, but we should come here rarely. */
+ int cy;
+ sequential:
+ cy = 0;
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t ai, ci, t;
+ ai = a[i];
+ t = b[i] + cy;
+ cy = t < cy;
+ ci = ai - t;
+ cy += ci > ai;
+ c[i] = ci;
+ }
+ carry_out = cy;
+ }
+
+ return carry_out;
+}
diff --git a/rts/gmp/mpn/cray/submul_1.c b/rts/gmp/mpn/cray/submul_1.c
new file mode 100644
index 0000000000..4d2fb13c62
--- /dev/null
+++ b/rts/gmp/mpn/cray/submul_1.c
@@ -0,0 +1,46 @@
+/* mpn_submul_1 for Cray PVP.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+ mp_ptr p0, p1, tp;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+
+ GMPN_MULWW (p1, p0, up, &n, &limb);
+ cy_limb = mpn_sub_n (tp, rp, p0, n);
+ rp[0] = tp[0];
+ cy_limb += mpn_sub_n (rp + 1, tp + 1, p1, n - 1);
+ cy_limb += p1[n - 1];
+
+ TMP_FREE (marker);
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/add_n.c b/rts/gmp/mpn/generic/add_n.c
new file mode 100644
index 0000000000..5fcb7e4835
--- /dev/null
+++ b/rts/gmp/mpn/generic/add_n.c
@@ -0,0 +1,62 @@
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to one addend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x + y; /* add other addend */
+ cy = (y < x) + cy; /* get out carry from that add, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
diff --git a/rts/gmp/mpn/generic/addmul_1.c b/rts/gmp/mpn/generic/addmul_1.c
new file mode 100644
index 0000000000..746ae31307
--- /dev/null
+++ b/rts/gmp/mpn/generic/addmul_1.c
@@ -0,0 +1,65 @@
+/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+ by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
+ limb vector pointed to by RES_PTR. Return the most significant limb of
+ the product, adjusted for carry-out from the addition.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+ register mp_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x + prod_low;
+ cy_limb += (prod_low < x);
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/addsub_n.c b/rts/gmp/mpn/generic/addsub_n.c
new file mode 100644
index 0000000000..c9bab3ef60
--- /dev/null
+++ b/rts/gmp/mpn/generic/addsub_n.c
@@ -0,0 +1,167 @@
+/* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
+
+
+/* mpn_addsub_n.
+ r1[] = s1[] + s2[]
+ r2[] = s1[] - s2[]
+ All operands have n limbs.
+ In-place operations allowed. */
+mp_limb_t
+#if __STDC__
+mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+#else
+mpn_addsub_n (r1p, r2p, s1p, s2p, n)
+ mp_ptr r1p, r2p;
+ mp_srcptr s1p, s2p;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t acyn, acyo; /* carry for add */
+ mp_limb_t scyn, scyo; /* carry for subtract */
+ mp_size_t off; /* offset in operands */
+ mp_size_t this_n; /* size of current chunk */
+
+ /* We alternatingly add and subtract in chunks that fit into the (L1)
+ cache. Since the chunks are several hundred limbs, the function call
+ overhead is insignificant, but we get much better locality. */
+
+ /* We have three variant of the inner loop, the proper loop is chosen
+ depending on whether r1 or r2 are the same operand as s1 or s2. */
+
+ if (r1p != s1p && r1p != s2p)
+ {
+ /* r1 is not identical to either input operand. We can therefore write
+ to r1 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ }
+ }
+ else if (r2p != s1p && r2p != s2p)
+ {
+ /* r2 is not identical to either input operand. We can therefore write
+ to r2 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+ }
+ }
+ else
+ {
+ /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2=s2 or vice versa)
+ Need temporary storage. */
+ mp_limb_t tp[PART_SIZE];
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
+ acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ MPN_COPY (r1p + off, tp, this_n);
+ }
+ }
+
+ return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+ mp_ptr r1p, r2p, s1p, s2p;
+ double t;
+ mp_size_t n;
+
+ n = strtol (argv[1], 0, 0);
+
+ r1p = malloc (n * BYTES_PER_MP_LIMB);
+ r2p = malloc (n * BYTES_PER_MP_LIMB);
+ s1p = malloc (n * BYTES_PER_MP_LIMB);
+ s2p = malloc (n * BYTES_PER_MP_LIMB);
+ TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+ printf (" separate add and sub: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
+ printf ("combined addsub separate variables: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r1 overlap: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r2 overlap: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
+ printf (" combined addsub in-place: %.3f\n", t);
+
+ return 0;
+}
+#endif
diff --git a/rts/gmp/mpn/generic/bdivmod.c b/rts/gmp/mpn/generic/bdivmod.c
new file mode 100644
index 0000000000..c4bcb414e6
--- /dev/null
+++ b/rts/gmp/mpn/generic/bdivmod.c
@@ -0,0 +1,120 @@
+/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
+
+ Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
+ returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
+
+ Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the
+ low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
+ the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
+
+ Preconditions:
+ 1. V is odd.
+ 2. usize * BITS_PER_MP_LIMB >= d.
+ 3. If Q and U overlap, qp <= up.
+
+ Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+ Funding for this work has been partially provided by Conselho Nacional
+ de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+ 301314194-2, and was done while I was a visiting reseacher in the Instituto
+ de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+ References:
+ T. Jebelean, An algorithm for exact division, Journal of Symbolic
+ Computation, v. 15, 1993, pp. 169-180.
+
+ K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
+ Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+#if __STDC__
+mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
+ mp_srcptr vp, mp_size_t vsize, unsigned long int d)
+#else
+mpn_bdivmod (qp, up, usize, vp, vsize, d)
+ mp_ptr qp;
+ mp_ptr up;
+ mp_size_t usize;
+ mp_srcptr vp;
+ mp_size_t vsize;
+ unsigned long int d;
+#endif
+{
+ mp_limb_t v_inv;
+
+ /* 1/V mod 2^BITS_PER_MP_LIMB. */
+ modlimb_invert (v_inv, vp[0]);
+
+ /* Fast code for two cases previously used by the accel part of mpn_gcd.
+ (Could probably remove this now it's inlined there.) */
+ if (usize == 2 && vsize == 2 &&
+ (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB))
+ {
+ mp_limb_t hi, lo;
+ mp_limb_t q = up[0] * v_inv;
+ umul_ppmm (hi, lo, q, vp[0]);
+ up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q;
+ if (d == 2*BITS_PER_MP_LIMB)
+ q = up[1] * v_inv, up[1] = 0, qp[1] = q;
+ return 0;
+ }
+
+ /* Main loop. */
+ while (d >= BITS_PER_MP_LIMB)
+ {
+ mp_limb_t q = up[0] * v_inv;
+ mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+ if (usize > vsize)
+ mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+ d -= BITS_PER_MP_LIMB;
+ up += 1, usize -= 1;
+ *qp++ = q;
+ }
+
+ if (d)
+ {
+ mp_limb_t b;
+ mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
+ if (q <= 1)
+ {
+ if (q == 0)
+ return 0;
+ else
+ b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
+ }
+ else
+ b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+
+ if (usize > vsize)
+ mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+ return q;
+ }
+
+ return 0;
+}
diff --git a/rts/gmp/mpn/generic/bz_divrem_n.c b/rts/gmp/mpn/generic/bz_divrem_n.c
new file mode 100644
index 0000000000..d234b22af5
--- /dev/null
+++ b/rts/gmp/mpn/generic/bz_divrem_n.c
@@ -0,0 +1,153 @@
+/* mpn_bz_divrem_n and auxilliary routines.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+ FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
+ Technical report MPI-I-98-1-022, october 1998.
+ http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
+*/
+
+static mp_limb_t mpn_bz_div_3_halves_by_2
+ _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n));
+
+
+/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
+ div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
+ i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
+#endif
+
+#if 0
+static
+unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+{
+ /* This might be useful: */
+ if (qxn != 0)
+ {
+ mp_limb_t c;
+ mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp + qxn - nn, np, nn);
+ MPN_ZERO (tp, qxn);
+ c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
+ /* Maybe copy proper part of tp to np? Documentation is unclear about
+ the returned np value when qxn != 0 */
+ return c;
+ }
+}
+#endif
+
+
+/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
+ by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
+ Returns most significant limb of the quotient, which is 0 or 1.
+ Requires that the most significant bit of the divisor is set. */
+
+mp_limb_t
+#if __STDC__
+mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
+#else
+mpn_bz_divrem_n (qp, np, dp, n)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t qhl, cc;
+
+ if (n % 2 != 0)
+ {
+ qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
+ cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]);
+ cc = mpn_sub_1 (np + n, np + n, 1, cc);
+ if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
+ while (cc)
+ {
+ qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1);
+ cc -= mpn_add_n (np + 1, np + 1, dp, n);
+ }
+ qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
+ mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
+ }
+ else
+ {
+ mp_size_t n2 = n/2;
+ qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2);
+ qhl += mpn_add_1 (qp + n2, qp + n2, n2,
+ mpn_bz_div_3_halves_by_2 (qp, np, dp, n2));
+ }
+ return qhl;
+}
+
+
+/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
+ the remainder in (np, 2n) */
+
+static mp_limb_t
+#if __STDC__
+mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
+#else
+mpn_bz_div_3_halves_by_2 (qp, np, dp, n)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+#endif
+{
+ mp_size_t twon = n + n;
+ mp_limb_t qhl, cc;
+ mp_ptr tmp;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ if (n < BZ_THRESHOLD)
+ qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
+ else
+ qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n);
+ tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB);
+ mpn_mul_n (tmp, qp, dp, n);
+ cc = mpn_sub_n (np, np, tmp, twon);
+ TMP_FREE (marker);
+ if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n);
+ while (cc)
+ {
+ qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
+ cc -= mpn_add_n (np, np, dp, twon);
+ }
+ return qhl;
+}
diff --git a/rts/gmp/mpn/generic/cmp.c b/rts/gmp/mpn/generic/cmp.c
new file mode 100644
index 0000000000..8e9792f54e
--- /dev/null
+++ b/rts/gmp/mpn/generic/cmp.c
@@ -0,0 +1,56 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ There are no restrictions on the relative sizes of
+ the two arguments.
+ Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */
+
+int
+#if __STDC__
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+#else
+mpn_cmp (op1_ptr, op2_ptr, size)
+ mp_srcptr op1_ptr;
+ mp_srcptr op2_ptr;
+ mp_size_t size;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t op1_word, op2_word;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ op1_word = op1_ptr[i];
+ op2_word = op2_ptr[i];
+ if (op1_word != op2_word)
+ goto diff;
+ }
+ return 0;
+ diff:
+ /* This can *not* be simplified to
+ op2_word - op2_word
+ since that expression might give signed overflow. */
+ return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/rts/gmp/mpn/generic/diveby3.c b/rts/gmp/mpn/generic/diveby3.c
new file mode 100644
index 0000000000..a2fb552bfa
--- /dev/null
+++ b/rts/gmp/mpn/generic/diveby3.c
@@ -0,0 +1,77 @@
+/* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
+ 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
+#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1)
+
+
+/* The "c += ..."s are adding the high limb of 3*l to c. That high limb
+ will be 0, 1 or 2. Doing two separate "+="s seems to turn out better
+ code on gcc (as of 2.95.2 at least).
+
+ When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a
+ limb value of either 0xFF...FF or 0xFF...FE and the multiply by INVERSE_3
+ gives 0x55...55 or 0xAA...AA respectively, producing a further borrow of
+ only 0 or 1 respectively. Hence the carry out of each stage and for the
+ return value is always only 0, 1 or 2. */
+
+mp_limb_t
+#if __STDC__
+mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c)
+#else
+mpn_divexact_by3c (dst, src, size, c)
+ mp_ptr dst;
+ mp_srcptr src;
+ mp_size_t size;
+ mp_limb_t c;
+#endif
+{
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+
+ i = 0;
+ do
+ {
+ mp_limb_t l, s;
+
+ s = src[i];
+ l = s - c;
+ c = (l > s);
+
+ l *= INVERSE_3;
+ dst[i] = l;
+
+ c += (l > MP_LIMB_T_MAX/3);
+ c += (l > (MP_LIMB_T_MAX/3)*2);
+ }
+ while (++i < size);
+
+ return c;
+}
diff --git a/rts/gmp/mpn/generic/divrem.c b/rts/gmp/mpn/generic/divrem.c
new file mode 100644
index 0000000000..30673e76d9
--- /dev/null
+++ b/rts/gmp/mpn/generic/divrem.c
@@ -0,0 +1,101 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+ quotient. This is now just a middle layer for calling the new
+ internal mpn_tdiv_qr.
+
+Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+#if __STDC__
+mpn_divrem (mp_ptr qp, mp_size_t qxn,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+#else
+mpn_divrem (qp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+#endif
+{
+ if (dn == 1)
+ {
+ mp_limb_t ret;
+ mp_ptr q2p;
+ mp_size_t qn;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
+
+ np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
+ qn = nn + qxn - 1;
+ MPN_COPY (qp, q2p, qn);
+ ret = q2p[qn];
+
+ TMP_FREE (marker);
+ return ret;
+ }
+ else if (dn == 2)
+ {
+ return mpn_divrem_2 (qp, qxn, np, nn, dp);
+ }
+ else
+ {
+ mp_ptr rp, q2p;
+ mp_limb_t qhl;
+ mp_size_t qn;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ if (qxn != 0)
+ {
+ mp_ptr n2p;
+ n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
+ MPN_ZERO (n2p, qxn);
+ MPN_COPY (n2p + qxn, np, nn);
+ q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
+ rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
+ MPN_COPY (np, rp, dn);
+ qn = nn - dn + qxn;
+ MPN_COPY (qp, q2p, qn);
+ qhl = q2p[qn];
+ }
+ else
+ {
+ q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
+ rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
+ MPN_COPY (np, rp, dn); /* overwrite np area with remainder */
+ qn = nn - dn;
+ MPN_COPY (qp, q2p, qn);
+ qhl = q2p[qn];
+ }
+ TMP_FREE (marker);
+ return qhl;
+ }
+}
diff --git a/rts/gmp/mpn/generic/divrem_1.c b/rts/gmp/mpn/generic/divrem_1.c
new file mode 100644
index 0000000000..e93f241c9d
--- /dev/null
+++ b/rts/gmp/mpn/generic/divrem_1.c
@@ -0,0 +1,248 @@
+/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+ QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb)
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+ QUOT_PTR and DIVIDEND_PTR might point to the same limb. */
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+static mp_limb_t
+#if __STDC__
+__gmpn_divmod_1_internal (mp_ptr quot_ptr,
+ mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb_t divisor_limb)
+#else
+__gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
+ mp_ptr quot_ptr;
+ mp_srcptr dividend_ptr;
+ mp_size_t dividend_size;
+ mp_limb_t divisor_limb;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t n1, n0, r;
+ int dummy;
+
+ /* ??? Should this be handled at all? Rely on callers? */
+ if (dividend_size == 0)
+ return 0;
+
+ /* If multiplication is much faster than division, and the
+ dividend is large, pre-invert the divisor, and use
+ only multiplications in the inner loop. */
+
+ /* This test should be read:
+ Does it ever help to use udiv_qrnnd_preinv?
+ && Does what we save compensate for the inversion overhead? */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6)
+ && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ divisor_limb <<= normalization_steps;
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (quot_ptr[i + 1], r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb, divisor_limb_inverted);
+ n1 = n0;
+ }
+ udiv_qrnnd_preinv (quot_ptr[0], r, r,
+ n1 << normalization_steps,
+ divisor_limb, divisor_limb_inverted);
+ return r >> normalization_steps;
+ }
+ else
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ {
+ quot_ptr[i] = 0;
+ i--;
+ }
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (quot_ptr[i], r, r,
+ n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+ }
+ }
+ else
+ {
+ if (UDIV_NEEDS_NORMALIZATION)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ divisor_limb <<= normalization_steps;
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (quot_ptr[i + 1], r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb);
+ n1 = n0;
+ }
+ udiv_qrnnd (quot_ptr[0], r, r,
+ n1 << normalization_steps,
+ divisor_limb);
+ return r >> normalization_steps;
+ }
+ }
+ /* No normalization needed, either because udiv_qrnnd doesn't require
+ it, or because DIVISOR_LIMB is already normalized. */
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ {
+ quot_ptr[i] = 0;
+ i--;
+ }
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb);
+ }
+ return r;
+ }
+}
+
+
+
+mp_limb_t
+#if __STDC__
+mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+ mp_srcptr np, mp_size_t nn,
+ mp_limb_t d)
+#else
+mpn_divrem_1 (qp, qxn, np, nn, d)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_srcptr np;
+ mp_size_t nn;
+ mp_limb_t d;
+#endif
+{
+ mp_limb_t rlimb;
+ mp_size_t i;
+
+ /* Develop integer part of quotient. */
+ rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d);
+
+ /* Develop fraction part of quotient. This is not as fast as it should;
+ the preinvert stuff from __gmpn_divmod_1_internal ought to be used here
+ too. */
+ if (UDIV_NEEDS_NORMALIZATION)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, d);
+ if (normalization_steps != 0)
+ {
+ d <<= normalization_steps;
+ rlimb <<= normalization_steps;
+
+ for (i = qxn - 1; i >= 0; i--)
+ udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
+
+ return rlimb >> normalization_steps;
+ }
+ else
+ /* fall out */
+ ;
+ }
+
+ for (i = qxn - 1; i >= 0; i--)
+ udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
+
+ return rlimb;
+}
diff --git a/rts/gmp/mpn/generic/divrem_2.c b/rts/gmp/mpn/generic/divrem_2.c
new file mode 100644
index 0000000000..0bc31ae2e7
--- /dev/null
+++ b/rts/gmp/mpn/generic/divrem_2.c
@@ -0,0 +1,151 @@
+/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
+ quotient. The divisor is two limbs.
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright (C) 1993, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/2) and write
+ the NSIZE-2 least significant quotient limbs at QP
+ and the 2 long remainder at NP. If QEXTRA_LIMBS is
+ non-zero, generate that many fraction bits and append them after the
+ other quotient limbs.
+ Return the most significant limb of the quotient, this is always 0 or 1.
+
+ Preconditions:
+ 0. NSIZE >= 2.
+ 1. The most significant bit of the divisor must be set.
+ 2. QP must either not overlap with the input operands at all, or
+ QP + 2 >= NP must hold true. (This means that it's
+ possible to put the quotient in the high part of NUM, right after the
+ remainder in NUM.
+ 3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero. */
+
+mp_limb_t
+#if __STDC__
+mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+ mp_ptr np, mp_size_t nsize,
+ mp_srcptr dp)
+#else
+mpn_divrem_2 (qp, qxn, np, nsize, dp)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nsize;
+ mp_srcptr dp;
+#endif
+{
+ mp_limb_t most_significant_q_limb = 0;
+ mp_size_t i;
+ mp_limb_t n1, n0, n2;
+ mp_limb_t d1, d0;
+ mp_limb_t d1inv;
+ int have_preinv;
+
+ np += nsize - 2;
+ d1 = dp[1];
+ d0 = dp[0];
+ n1 = np[1];
+ n0 = np[0];
+
+ if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+ {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ most_significant_q_limb = 1;
+ }
+
+ /* If multiplication is much faster than division, preinvert the most
+ significant divisor limb before entering the loop. */
+ if (UDIV_TIME > 2 * UMUL_TIME + 6)
+ {
+ have_preinv = 0;
+ if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - 2) > UDIV_TIME)
+ {
+ invert_limb (d1inv, d1);
+ have_preinv = 1;
+ }
+ }
+
+ for (i = qxn + nsize - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ mp_limb_t r;
+
+ if (i >= qxn)
+ np--;
+ else
+ np[0] = 0;
+
+ if (n1 == d1)
+ {
+ /* Q should be either 111..111 or 111..110. Need special treatment
+ of this rare case as normal division would give overflow. */
+ q = ~(mp_limb_t) 0;
+
+ r = n0 + d1;
+ if (r < d1) /* Carry in the addition? */
+ {
+ add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+ qp[i] = q;
+ continue;
+ }
+ n1 = d0 - (d0 != 0);
+ n0 = -d0;
+ }
+ else
+ {
+ if (UDIV_TIME > 2 * UMUL_TIME + 6 && have_preinv)
+ udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
+ else
+ udiv_qrnnd (q, r, n1, n0, d1);
+ umul_ppmm (n1, n0, d0, q);
+ }
+
+ n2 = np[0];
+
+ q_test:
+ if (n1 > r || (n1 == r && n0 > n2))
+ {
+ /* The estimated Q was too large. */
+ q--;
+
+ sub_ddmmss (n1, n0, n1, n0, 0, d0);
+ r += d1;
+ if (r >= d1) /* If not carry, test Q again. */
+ goto q_test;
+ }
+
+ qp[i] = q;
+ sub_ddmmss (n1, n0, r, n2, n1, n0);
+ }
+ np[1] = n1;
+ np[0] = n0;
+
+ return most_significant_q_limb;
+}
diff --git a/rts/gmp/mpn/generic/dump.c b/rts/gmp/mpn/generic/dump.c
new file mode 100644
index 0000000000..66f375c74b
--- /dev/null
+++ b/rts/gmp/mpn/generic/dump.c
@@ -0,0 +1,76 @@
+/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO
+ CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+ FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpn_dump (mp_srcptr ptr, mp_size_t size)
+#else
+mpn_dump (ptr, size)
+ mp_srcptr ptr;
+ mp_size_t size;
+#endif
+{
+ MPN_NORMALIZE (ptr, size);
+
+ if (size == 0)
+ printf ("0\n");
+ else
+ {
+ size--;
+ if (BYTES_PER_MP_LIMB > sizeof (long))
+ {
+ if ((ptr[size] >> BITS_PER_MP_LIMB/2) != 0)
+ {
+ printf ("%lX",
+ (unsigned long) (ptr[size] >> BITS_PER_MP_LIMB/2));
+ printf ("%0*lX", (int) (BYTES_PER_MP_LIMB),
+ (unsigned long) ptr[size]);
+ }
+ else
+ printf ("%lX", (unsigned long) ptr[size]);
+ }
+ else
+ printf ("%lX", ptr[size]);
+
+ while (size)
+ {
+ size--;
+ if (BYTES_PER_MP_LIMB > sizeof (long))
+ {
+ printf ("%0*lX", (int) (BYTES_PER_MP_LIMB),
+ (unsigned long) (ptr[size] >> BITS_PER_MP_LIMB/2));
+ printf ("%0*lX", (int) (BYTES_PER_MP_LIMB),
+ (unsigned long) ptr[size]);
+ }
+ else
+ printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]);
+ }
+ printf ("\n");
+ }
+}
diff --git a/rts/gmp/mpn/generic/gcd.c b/rts/gmp/mpn/generic/gcd.c
new file mode 100644
index 0000000000..059e219a06
--- /dev/null
+++ b/rts/gmp/mpn/generic/gcd.c
@@ -0,0 +1,414 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* Integer greatest common divisor of two unsigned integers, using
+ the accelerated algorithm (see reference below).
+
+ mp_size_t mpn_gcd (up, usize, vp, vsize).
+
+ Preconditions [U = (up, usize) and V = (vp, vsize)]:
+
+ 1. V is odd.
+ 2. numbits(U) >= numbits(V).
+
+ Both U and V are destroyed by the operation. The result is left at vp,
+ and its size is returned.
+
+ Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+ Funding for this work has been partially provided by Conselho Nacional
+ de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+ 301314194-2, and was done while I was a visiting reseacher in the Instituto
+ de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+ Refer to
+ K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
+ Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
+ algorithm is used, otherwise the binary algorithm is used. This may be
+ adjusted for different architectures. */
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 5
+#endif
+
+/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
+ algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
+ is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */
+enum
+ {
+ BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
+ };
+
+
+/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
+ Both U and V must be odd. */
+static __gmp_inline mp_size_t
+#if __STDC__
+gcd_2 (mp_ptr vp, mp_srcptr up)
+#else
+gcd_2 (vp, up)
+ mp_ptr vp;
+ mp_srcptr up;
+#endif
+{
+ mp_limb_t u0, u1, v0, v1;
+ mp_size_t vsize;
+
+ u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
+
+ while (u1 != v1 && u0 != v0)
+ {
+ unsigned long int r;
+ if (u1 > v1)
+ {
+ u1 -= v1 + (u0 < v0), u0 -= v0;
+ count_trailing_zeros (r, u0);
+ u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
+ u1 >>= r;
+ }
+ else /* u1 < v1. */
+ {
+ v1 -= u1 + (v0 < u0), v0 -= u0;
+ count_trailing_zeros (r, v0);
+ v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
+ v1 >>= r;
+ }
+ }
+
+ vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
+
+ /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
+ if (u1 == v1 && u0 == v0)
+ return vsize;
+
+ v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
+ vp[0] = mpn_gcd_1 (vp, vsize, v0);
+
+ return 1;
+}
+
+/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
+ 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
+ In the reference article, D was computed along with N, but it is better to
+ compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
+ the result as a twos' complement signed integer.
+
+ Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference
+ article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
+ 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
+ precision. If N2 > N1 initially, the first iteration of the while loop
+ will swap them. In all other situations, N1 >= N2 is maintained. */
+
+static
+#if ! defined (__i386__)
+__gmp_inline /* don't inline this for the x86 */
+#endif
+mp_limb_t
+#if __STDC__
+find_a (mp_srcptr cp)
+#else
+find_a (cp)
+ mp_srcptr cp;
+#endif
+{
+ unsigned long int leading_zero_bits = 0;
+
+ mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
+ mp_limb_t n1_h = cp[1];
+
+ mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
+ mp_limb_t n2_h = ~n1_h;
+
+ /* Main loop. */
+ while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
+ {
+ /* N1 <-- N1 % N2. */
+ if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0)
+ {
+ unsigned long int i;
+ count_leading_zeros (i, n2_h);
+ i -= leading_zero_bits, leading_zero_bits += i;
+ n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
+ do
+ {
+ if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+ n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+ n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
+ i -= 1;
+ }
+ while (i);
+ }
+ if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+ n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+
+ MP_LIMB_T_SWAP (n1_h, n2_h);
+ MP_LIMB_T_SWAP (n1_l, n2_l);
+ }
+
+ return n2_l;
+}
+
+mp_size_t
+#if __STDC__
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcd (gp, up, usize, vp, vsize)
+ mp_ptr gp;
+ mp_ptr up;
+ mp_size_t usize;
+ mp_ptr vp;
+ mp_size_t vsize;
+#endif
+{
+ mp_ptr orig_vp = vp;
+ mp_size_t orig_vsize = vsize;
+ int binary_gcd_ctr; /* Number of times binary gcd will execute. */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
+ Two EXTRA limbs for U and V are required for kary reduction. */
+ if (vsize >= GCD_ACCEL_THRESHOLD)
+ {
+ unsigned long int vbitsize, d;
+ mp_ptr orig_up = up;
+ mp_size_t orig_usize = usize;
+ mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
+
+ MPN_COPY (anchor_up, orig_up, usize);
+ up = anchor_up;
+
+ count_leading_zeros (d, up[usize-1]);
+ d = usize * BITS_PER_MP_LIMB - d;
+ count_leading_zeros (vbitsize, vp[vsize-1]);
+ vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+ d = d - vbitsize + 1;
+
+ /* Use bmod reduction to quickly discover whether V divides U. */
+ up[usize++] = 0; /* Insert leading zero. */
+ mpn_bdivmod (up, up, usize, vp, vsize, d);
+
+ /* Now skip U/V mod 2^d and any low zero limbs. */
+ d /= BITS_PER_MP_LIMB, up += d, usize -= d;
+ while (usize != 0 && up[0] == 0)
+ up++, usize--;
+
+ if (usize == 0) /* GCD == ORIG_V. */
+ goto done;
+
+ vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
+ MPN_COPY (vp, orig_vp, vsize);
+
+ do /* Main loop. */
+ {
+ /* mpn_com_n can't be used here because anchor_up and up may
+ partially overlap */
+ if (up[usize-1] & MP_LIMB_T_HIGHBIT) /* U < 0; take twos' compl. */
+ {
+ mp_size_t i;
+ anchor_up[0] = -up[0];
+ for (i = 1; i < usize; i++)
+ anchor_up[i] = ~up[i];
+ up = anchor_up;
+ }
+
+ MPN_NORMALIZE_NOT_ZERO (up, usize);
+
+ if ((up[0] & 1) == 0) /* Result even; remove twos. */
+ {
+ unsigned int r;
+ count_trailing_zeros (r, up[0]);
+ mpn_rshift (anchor_up, up, usize, r);
+ usize -= (anchor_up[usize-1] == 0);
+ }
+ else if (anchor_up != up)
+ MPN_COPY_INCR (anchor_up, up, usize);
+
+ MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
+ up = anchor_up;
+
+ if (vsize <= 2) /* Kary can't handle < 2 limbs and */
+ break; /* isn't efficient for == 2 limbs. */
+
+ d = vbitsize;
+ count_leading_zeros (vbitsize, vp[vsize-1]);
+ vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+ d = d - vbitsize + 1;
+
+ if (d > BMOD_THRESHOLD) /* Bmod reduction. */
+ {
+ up[usize++] = 0;
+ mpn_bdivmod (up, up, usize, vp, vsize, d);
+ d /= BITS_PER_MP_LIMB, up += d, usize -= d;
+ }
+ else /* Kary reduction. */
+ {
+ mp_limb_t bp[2], cp[2];
+
+ /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */
+ {
+ mp_limb_t u_inv, hi, lo;
+ modlimb_invert (u_inv, up[0]);
+ cp[0] = vp[0] * u_inv;
+ umul_ppmm (hi, lo, cp[0], up[0]);
+ cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv;
+ }
+
+ /* U <-- find_a (C) * U. */
+ up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
+ usize++;
+
+ /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
+ bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
+ bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2
+
+ Like V/U above, but simplified because only the low bit of
+ bp[1] is wanted. */
+ {
+ mp_limb_t v_inv, hi, lo;
+ modlimb_invert (v_inv, vp[0]);
+ bp[0] = up[0] * v_inv;
+ umul_ppmm (hi, lo, bp[0], vp[0]);
+ bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1;
+ }
+
+ up[usize++] = 0;
+ if (bp[1]) /* B < 0: U <-- U + (-B) * V. */
+ {
+ mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
+ mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
+ }
+ else /* B >= 0: U <-- U - B * V. */
+ {
+ mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
+ mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+ }
+
+ up += 2, usize -= 2; /* At least two low limbs are zero. */
+ }
+
+ /* Must remove low zero limbs before complementing. */
+ while (usize != 0 && up[0] == 0)
+ up++, usize--;
+ }
+ while (usize);
+
+ /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
+ up = orig_up, usize = orig_usize;
+ binary_gcd_ctr = 2;
+ }
+ else
+ binary_gcd_ctr = 1;
+
+ /* Finish up with the binary algorithm. Executes once or twice. */
+ for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
+ {
+ if (usize > 2) /* First make U close to V in size. */
+ {
+ unsigned long int vbitsize, d;
+ count_leading_zeros (d, up[usize-1]);
+ d = usize * BITS_PER_MP_LIMB - d;
+ count_leading_zeros (vbitsize, vp[vsize-1]);
+ vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+ d = d - vbitsize - 1;
+ if (d != -(unsigned long int)1 && d > 2)
+ {
+ mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
+ d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
+ }
+ }
+
+ /* Start binary GCD. */
+ do
+ {
+ mp_size_t zeros;
+
+ /* Make sure U is odd. */
+ MPN_NORMALIZE (up, usize);
+ while (up[0] == 0)
+ up += 1, usize -= 1;
+ if ((up[0] & 1) == 0)
+ {
+ unsigned int r;
+ count_trailing_zeros (r, up[0]);
+ mpn_rshift (up, up, usize, r);
+ usize -= (up[usize-1] == 0);
+ }
+
+ /* Keep usize >= vsize. */
+ if (usize < vsize)
+ MPN_PTR_SWAP (up, usize, vp, vsize);
+
+ if (usize <= 2) /* Double precision. */
+ {
+ if (vsize == 1)
+ vp[0] = mpn_gcd_1 (up, usize, vp[0]);
+ else
+ vsize = gcd_2 (vp, up);
+ break; /* Binary GCD done. */
+ }
+
+ /* Count number of low zero limbs of U - V. */
+ for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
+ continue;
+
+ /* If U < V, swap U and V; in any case, subtract V from U. */
+ if (zeros == vsize) /* Subtract done. */
+ up += zeros, usize -= zeros;
+ else if (usize == vsize)
+ {
+ mp_size_t size = vsize;
+ do
+ size--;
+ while (up[size] == vp[size]);
+ if (up[size] < vp[size]) /* usize == vsize. */
+ MP_PTR_SWAP (up, vp);
+ up += zeros, usize = size + 1 - zeros;
+ mpn_sub_n (up, up, vp + zeros, usize);
+ }
+ else
+ {
+ mp_size_t size = vsize - zeros;
+ up += zeros, usize -= zeros;
+ if (mpn_sub_n (up, up, vp + zeros, size))
+ {
+ while (up[size] == 0) /* Propagate borrow. */
+ up[size++] = -(mp_limb_t)1;
+ up[size] -= 1;
+ }
+ }
+ }
+ while (usize); /* End binary GCD. */
+ }
+
+done:
+ if (vp != gp)
+ MPN_COPY (gp, vp, vsize);
+ TMP_FREE (marker);
+ return vsize;
+}
diff --git a/rts/gmp/mpn/generic/gcd_1.c b/rts/gmp/mpn/generic/gcd_1.c
new file mode 100644
index 0000000000..1832636636
--- /dev/null
+++ b/rts/gmp/mpn/generic/gcd_1.c
@@ -0,0 +1,77 @@
+/* mpn_gcd_1 --
+
+Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Does not work for U == 0 or V == 0. It would be tough to make it work for
+ V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t. */
+
+mp_limb_t
+#if __STDC__
+mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
+#else
+mpn_gcd_1 (up, size, vlimb)
+ mp_srcptr up;
+ mp_size_t size;
+ mp_limb_t vlimb;
+#endif
+{
+ mp_limb_t ulimb;
+ unsigned long int u_low_zero_bits, v_low_zero_bits;
+
+ if (size > 1)
+ {
+ ulimb = mpn_mod_1 (up, size, vlimb);
+ if (ulimb == 0)
+ return vlimb;
+ }
+ else
+ ulimb = up[0];
+
+ /* Need to eliminate low zero bits. */
+ count_trailing_zeros (u_low_zero_bits, ulimb);
+ ulimb >>= u_low_zero_bits;
+
+ count_trailing_zeros (v_low_zero_bits, vlimb);
+ vlimb >>= v_low_zero_bits;
+
+ while (ulimb != vlimb)
+ {
+ if (ulimb > vlimb)
+ {
+ ulimb -= vlimb;
+ do
+ ulimb >>= 1;
+ while ((ulimb & 1) == 0);
+ }
+ else /* vlimb > ulimb. */
+ {
+ vlimb -= ulimb;
+ do
+ vlimb >>= 1;
+ while ((vlimb & 1) == 0);
+ }
+ }
+
+ return ulimb << MIN (u_low_zero_bits, v_low_zero_bits);
+}
diff --git a/rts/gmp/mpn/generic/gcdext.c b/rts/gmp/mpn/generic/gcdext.c
new file mode 100644
index 0000000000..fe22d779a6
--- /dev/null
+++ b/rts/gmp/mpn/generic/gcdext.c
@@ -0,0 +1,700 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright (C) 1996, 1998, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 17
+#endif
+
+#ifndef EXTEND
+#define EXTEND 1
+#endif
+
+#if STAT
+int arr[BITS_PER_MP_LIMB];
+#endif
+
+
+/* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE)
+
+ Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the
+ greatest common divisor at GP (unless it is 0), and the first cofactor at
+ SP. Write the size of the cofactor through the pointer SSIZE. Return the
+ size of the value at GP. Note that SP might be a negative number; this is
+ denoted by storing the negative of the size through SSIZE.
+
+ {UP,USIZE} and {VP,VSIZE} are both clobbered.
+
+ The space allocation for all four areas needs to be USIZE+1.
+
+ Preconditions: 1) U >= V.
+ 2) V > 0. */
+
+/* We use Lehmer's algorithm. The idea is to extract the most significant
+ bits of the operands, and compute the continued fraction for them. We then
+ apply the gathered cofactors to the full operands.
+
+ Idea 1: After we have performed a full division, don't shift operands back,
+ but instead account for the extra factors-of-2 thus introduced.
+ Idea 2: Simple generalization to use divide-and-conquer would give us an
+ algorithm that runs faster than O(n^2).
+ Idea 3: The input numbers need less space as the computation progresses,
+ while the s0 and s1 variables need more space. To save memory, we
+ could make them share space, and have the latter variables grow
+ into the former.
+ Idea 4: We should not do double-limb arithmetic from the start. Instead,
+ do things in single-limb arithmetic until the quotients differ,
+ and then switch to double-limb arithmetic. */
+
+
+/* Division optimized for small quotients. If the quotient is more than one limb,
+ store 1 in *qh and return 0. */
+static mp_limb_t
+#if __STDC__
+div2 (mp_limb_t *qh, mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0)
+#else
+div2 (qh, n1, n0, d1, d0)
+ mp_limb_t *qh;
+ mp_limb_t n1;
+ mp_limb_t n0;
+ mp_limb_t d1;
+ mp_limb_t d0;
+#endif
+{
+ if (d1 == 0)
+ {
+ *qh = 1;
+ return 0;
+ }
+
+ if ((mp_limb_signed_t) n1 < 0)
+ {
+ mp_limb_t q;
+ int cnt;
+ for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++)
+ {
+ d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ q <<= 1;
+ if (n1 > d1 || (n1 == d1 && n0 >= d0))
+ {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ q |= 1;
+ }
+ d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
+ d1 = d1 >> 1;
+ cnt--;
+ }
+
+ *qh = 0;
+ return q;
+ }
+ else
+ {
+ mp_limb_t q;
+ int cnt;
+ for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++)
+ {
+ d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
+ d1 = d1 >> 1;
+ q <<= 1;
+ if (n1 > d1 || (n1 == d1 && n0 >= d0))
+ {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ q |= 1;
+ }
+ cnt--;
+ }
+
+ *qh = 0;
+ return q;
+ }
+}
+
+mp_size_t
+#if EXTEND
+#if __STDC__
+mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size,
+ mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcdext (gp, s0p, s0size, up, size, vp, vsize)
+ mp_ptr gp;
+ mp_ptr s0p;
+ mp_size_t *s0size;
+ mp_ptr up;
+ mp_size_t size;
+ mp_ptr vp;
+ mp_size_t vsize;
+#endif
+#else
+#if __STDC__
+mpn_gcd (mp_ptr gp,
+ mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcd (gp, up, size, vp, vsize)
+ mp_ptr gp;
+ mp_ptr up;
+ mp_size_t size;
+ mp_ptr vp;
+ mp_size_t vsize;
+#endif
+#endif
+{
+ mp_limb_t A, B, C, D;
+ int cnt;
+ mp_ptr tp, wp;
+#if RECORD
+ mp_limb_t max = 0;
+#endif
+#if EXTEND
+ mp_ptr s1p;
+ mp_ptr orig_s0p = s0p;
+ mp_size_t ssize;
+ int sign = 1;
+#endif
+ int use_double_flag;
+ TMP_DECL (mark);
+
+ TMP_MARK (mark);
+
+ use_double_flag = (size >= GCDEXT_THRESHOLD);
+
+ tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+ wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+#if EXTEND
+ s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+
+ MPN_ZERO (s0p, size);
+ MPN_ZERO (s1p, size);
+
+ s0p[0] = 1;
+ s1p[0] = 0;
+ ssize = 1;
+#endif
+
+ if (size > vsize)
+ {
+ /* Normalize V (and shift up U the same amount). */
+ count_leading_zeros (cnt, vp[vsize - 1]);
+ if (cnt != 0)
+ {
+ mp_limb_t cy;
+ mpn_lshift (vp, vp, vsize, cnt);
+ cy = mpn_lshift (up, up, size, cnt);
+ up[size] = cy;
+ size += cy != 0;
+ }
+
+ mpn_divmod (up + vsize, up, size, vp, vsize);
+#if EXTEND
+ /* This is really what it boils down to in this case... */
+ s0p[0] = 0;
+ s1p[0] = 1;
+ sign = -sign;
+#endif
+ size = vsize;
+ if (cnt != 0)
+ {
+ mpn_rshift (up, up, size, cnt);
+ mpn_rshift (vp, vp, size, cnt);
+ }
+ MP_PTR_SWAP (up, vp);
+ }
+
+ for (;;)
+ {
+ mp_limb_t asign;
+ /* Figure out exact size of V. */
+ vsize = size;
+ MPN_NORMALIZE (vp, vsize);
+ if (vsize <= 1)
+ break;
+
+ if (use_double_flag)
+ {
+ mp_limb_t uh, vh, ul, vl;
+ /* Let UH,UL be the most significant limbs of U, and let VH,VL be
+ the corresponding bits from V. */
+ uh = up[size - 1];
+ vh = vp[size - 1];
+ ul = up[size - 2];
+ vl = vp[size - 2];
+ count_leading_zeros (cnt, uh);
+ if (cnt != 0)
+ {
+ uh = (uh << cnt) | (ul >> (BITS_PER_MP_LIMB - cnt));
+ vh = (vh << cnt) | (vl >> (BITS_PER_MP_LIMB - cnt));
+ vl <<= cnt;
+ ul <<= cnt;
+ if (size >= 3)
+ {
+ ul |= (up[size - 3] >> (BITS_PER_MP_LIMB - cnt));
+ vl |= (vp[size - 3] >> (BITS_PER_MP_LIMB - cnt));
+ }
+ }
+
+ A = 1;
+ B = 0;
+ C = 0;
+ D = 1;
+
+ asign = 0;
+ for (;;)
+ {
+ mp_limb_t T;
+ mp_limb_t qh, q1, q2;
+ mp_limb_t nh, nl, dh, dl;
+ mp_limb_t t1, t0;
+ mp_limb_t Th, Tl;
+
+ sub_ddmmss (dh, dl, vh, vl, 0, C);
+ if ((dl | dh) == 0)
+ break;
+ add_ssaaaa (nh, nl, uh, ul, 0, A);
+ q1 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ add_ssaaaa (dh, dl, vh, vl, 0, D);
+ if ((dl | dh) == 0)
+ break;
+ sub_ddmmss (nh, nl, uh, ul, 0, B);
+ q2 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ if (q1 != q2)
+ break;
+
+ asign = ~asign;
+
+ T = A + q1 * C;
+ A = C;
+ C = T;
+ T = B + q1 * D;
+ B = D;
+ D = T;
+ umul_ppmm (t1, t0, q1, vl);
+ t1 += q1 * vh;
+ sub_ddmmss (Th, Tl, uh, ul, t1, t0);
+ uh = vh, ul = vl;
+ vh = Th, vl = Tl;
+
+ add_ssaaaa (dh, dl, vh, vl, 0, C);
+ sub_ddmmss (nh, nl, uh, ul, 0, A);
+ q1 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ sub_ddmmss (dh, dl, vh, vl, 0, D);
+ if ((dl | dh) == 0)
+ break;
+ add_ssaaaa (nh, nl, uh, ul, 0, B);
+ q2 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ if (q1 != q2)
+ break;
+
+ asign = ~asign;
+
+ T = A + q1 * C;
+ A = C;
+ C = T;
+ T = B + q1 * D;
+ B = D;
+ D = T;
+ umul_ppmm (t1, t0, q1, vl);
+ t1 += q1 * vh;
+ sub_ddmmss (Th, Tl, uh, ul, t1, t0);
+ uh = vh, ul = vl;
+ vh = Th, vl = Tl;
+ }
+#if EXTEND
+ if (asign)
+ sign = -sign;
+#endif
+ }
+ else /* Same, but using single-limb calculations. */
+ {
+ mp_limb_t uh, vh;
+ /* Make UH be the most significant limb of U, and make VH be
+ corresponding bits from V. */
+ uh = up[size - 1];
+ vh = vp[size - 1];
+ count_leading_zeros (cnt, uh);
+ if (cnt != 0)
+ {
+ uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
+ vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
+ }
+
+ A = 1;
+ B = 0;
+ C = 0;
+ D = 1;
+
+ asign = 0;
+ for (;;)
+ {
+ mp_limb_t q, T;
+ if (vh - C == 0 || vh + D == 0)
+ break;
+
+ q = (uh + A) / (vh - C);
+ if (q != (uh - B) / (vh + D))
+ break;
+
+ asign = ~asign;
+
+ T = A + q * C;
+ A = C;
+ C = T;
+ T = B + q * D;
+ B = D;
+ D = T;
+ T = uh - q * vh;
+ uh = vh;
+ vh = T;
+
+ if (vh - D == 0)
+ break;
+
+ q = (uh - A) / (vh + C);
+ if (q != (uh + B) / (vh - D))
+ break;
+
+ asign = ~asign;
+
+ T = A + q * C;
+ A = C;
+ C = T;
+ T = B + q * D;
+ B = D;
+ D = T;
+ T = uh - q * vh;
+ uh = vh;
+ vh = T;
+ }
+#if EXTEND
+ if (asign)
+ sign = -sign;
+#endif
+ }
+
+#if RECORD
+ max = MAX (A, max); max = MAX (B, max);
+ max = MAX (C, max); max = MAX (D, max);
+#endif
+
+ if (B == 0)
+ {
+ mp_limb_t qh;
+ mp_size_t i;
+ /* This is quite rare. I.e., optimize something else! */
+
+ /* Normalize V (and shift up U the same amount). */
+ count_leading_zeros (cnt, vp[vsize - 1]);
+ if (cnt != 0)
+ {
+ mp_limb_t cy;
+ mpn_lshift (vp, vp, vsize, cnt);
+ cy = mpn_lshift (up, up, size, cnt);
+ up[size] = cy;
+ size += cy != 0;
+ }
+
+ qh = mpn_divmod (up + vsize, up, size, vp, vsize);
+#if EXTEND
+ MPN_COPY (tp, s0p, ssize);
+ {
+ mp_size_t qsize;
+
+ qsize = size - vsize; /* size of stored quotient from division */
+ if (ssize < qsize)
+ {
+ MPN_ZERO (tp + ssize, qsize - ssize);
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < ssize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, up + vsize, qsize, s1p[i]);
+ tp[qsize + i] = cy;
+ }
+ if (qh != 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
+ if (cy != 0)
+ abort ();
+ }
+ }
+ else
+ {
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < qsize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
+ tp[ssize + i] = cy;
+ }
+ if (qh != 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
+ if (cy != 0)
+ {
+ tp[qsize + ssize] = cy;
+ s1p[qsize + ssize] = 0;
+ ssize++;
+ }
+ }
+ }
+ ssize += qsize;
+ ssize -= tp[ssize - 1] == 0;
+ }
+
+ sign = -sign;
+ MP_PTR_SWAP (s0p, s1p);
+ MP_PTR_SWAP (s1p, tp);
+#endif
+ size = vsize;
+ if (cnt != 0)
+ {
+ mpn_rshift (up, up, size, cnt);
+ mpn_rshift (vp, vp, size, cnt);
+ }
+ MP_PTR_SWAP (up, vp);
+ }
+ else
+ {
+#if EXTEND
+ mp_size_t tsize, wsize;
+#endif
+ /* T = U*A + V*B
+ W = U*C + V*D
+ U = T
+ V = W */
+
+#if STAT
+ { mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x);
+ arr[BITS_PER_MP_LIMB - cnt]++; }
+#endif
+ if (A == 0)
+ {
+ /* B == 1 and C == 1 (D is arbitrary) */
+ mp_limb_t cy;
+ MPN_COPY (tp, vp, size);
+ MPN_COPY (wp, up, size);
+ mpn_submul_1 (wp, vp, size, D);
+ MP_PTR_SWAP (tp, up);
+ MP_PTR_SWAP (wp, vp);
+#if EXTEND
+ MPN_COPY (tp, s1p, ssize);
+ tsize = ssize;
+ tp[ssize] = 0; /* must zero since wp might spill below */
+ MPN_COPY (wp, s0p, ssize);
+ cy = mpn_addmul_1 (wp, s1p, ssize, D);
+ wp[ssize] = cy;
+ wsize = ssize + (cy != 0);
+ MP_PTR_SWAP (tp, s0p);
+ MP_PTR_SWAP (wp, s1p);
+ ssize = MAX (wsize, tsize);
+#endif
+ }
+ else
+ {
+ if (asign)
+ {
+ mp_limb_t cy;
+ mpn_mul_1 (tp, vp, size, B);
+ mpn_submul_1 (tp, up, size, A);
+ mpn_mul_1 (wp, up, size, C);
+ mpn_submul_1 (wp, vp, size, D);
+ MP_PTR_SWAP (tp, up);
+ MP_PTR_SWAP (wp, vp);
+#if EXTEND
+ cy = mpn_mul_1 (tp, s1p, ssize, B);
+ cy += mpn_addmul_1 (tp, s0p, ssize, A);
+ tp[ssize] = cy;
+ tsize = ssize + (cy != 0);
+ cy = mpn_mul_1 (wp, s0p, ssize, C);
+ cy += mpn_addmul_1 (wp, s1p, ssize, D);
+ wp[ssize] = cy;
+ wsize = ssize + (cy != 0);
+ MP_PTR_SWAP (tp, s0p);
+ MP_PTR_SWAP (wp, s1p);
+ ssize = MAX (wsize, tsize);
+#endif
+ }
+ else
+ {
+ mp_limb_t cy;
+ mpn_mul_1 (tp, up, size, A);
+ mpn_submul_1 (tp, vp, size, B);
+ mpn_mul_1 (wp, vp, size, D);
+ mpn_submul_1 (wp, up, size, C);
+ MP_PTR_SWAP (tp, up);
+ MP_PTR_SWAP (wp, vp);
+#if EXTEND
+ cy = mpn_mul_1 (tp, s0p, ssize, A);
+ cy += mpn_addmul_1 (tp, s1p, ssize, B);
+ tp[ssize] = cy;
+ tsize = ssize + (cy != 0);
+ cy = mpn_mul_1 (wp, s1p, ssize, D);
+ cy += mpn_addmul_1 (wp, s0p, ssize, C);
+ wp[ssize] = cy;
+ wsize = ssize + (cy != 0);
+ MP_PTR_SWAP (tp, s0p);
+ MP_PTR_SWAP (wp, s1p);
+ ssize = MAX (wsize, tsize);
+#endif
+ }
+ }
+
+ size -= up[size - 1] == 0;
+ }
+ }
+
+#if RECORD
+ printf ("max: %lx\n", max);
+#endif
+
+#if STAT
+ {int i; for (i = 0; i < BITS_PER_MP_LIMB; i++) printf ("%d:%d\n", i, arr[i]);}
+#endif
+
+ if (vsize == 0)
+ {
+ if (gp != up && gp != 0)
+ MPN_COPY (gp, up, size);
+#if EXTEND
+ MPN_NORMALIZE (s0p, ssize);
+ if (orig_s0p != s0p)
+ MPN_COPY (orig_s0p, s0p, ssize);
+ *s0size = sign >= 0 ? ssize : -ssize;
+#endif
+ TMP_FREE (mark);
+ return size;
+ }
+ else
+ {
+ mp_limb_t vl, ul, t;
+#if EXTEND
+ mp_size_t qsize, i;
+#endif
+ vl = vp[0];
+#if EXTEND
+ t = mpn_divmod_1 (wp, up, size, vl);
+
+ MPN_COPY (tp, s0p, ssize);
+
+ qsize = size - (wp[size - 1] == 0); /* size of quotient from division */
+ if (ssize < qsize)
+ {
+ MPN_ZERO (tp + ssize, qsize - ssize);
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < ssize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]);
+ tp[qsize + i] = cy;
+ }
+ }
+ else
+ {
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < qsize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
+ tp[ssize + i] = cy;
+ }
+ }
+ ssize += qsize;
+ ssize -= tp[ssize - 1] == 0;
+
+ sign = -sign;
+ MP_PTR_SWAP (s0p, s1p);
+ MP_PTR_SWAP (s1p, tp);
+#else
+ t = mpn_mod_1 (up, size, vl);
+#endif
+ ul = vl;
+ vl = t;
+ while (vl != 0)
+ {
+ mp_limb_t t;
+#if EXTEND
+ mp_limb_t q;
+ q = ul / vl;
+ t = ul - q * vl;
+
+ MPN_COPY (tp, s0p, ssize);
+
+ MPN_ZERO (s1p + ssize, 1); /* zero s1 too */
+
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp, s1p, ssize, q);
+ tp[ssize] = cy;
+ }
+
+ ssize += 1;
+ ssize -= tp[ssize - 1] == 0;
+
+ sign = -sign;
+ MP_PTR_SWAP (s0p, s1p);
+ MP_PTR_SWAP (s1p, tp);
+#else
+ t = ul % vl;
+#endif
+ ul = vl;
+ vl = t;
+ }
+ if (gp != 0)
+ gp[0] = ul;
+#if EXTEND
+ MPN_NORMALIZE (s0p, ssize);
+ if (orig_s0p != s0p)
+ MPN_COPY (orig_s0p, s0p, ssize);
+ *s0size = sign >= 0 ? ssize : -ssize;
+#endif
+ TMP_FREE (mark);
+ return 1;
+ }
+}
diff --git a/rts/gmp/mpn/generic/get_str.c b/rts/gmp/mpn/generic/get_str.c
new file mode 100644
index 0000000000..a713b61825
--- /dev/null
+++ b/rts/gmp/mpn/generic/get_str.c
@@ -0,0 +1,216 @@
+/* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR
+ to a printable string in STR in base BASE.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Convert the limb vector pointed to by MPTR and MSIZE long to a
+ char array, using base BASE for the result array. Store the
+ result in the character array STR. STR must point to an array with
+ space for the largest possible number represented by a MSIZE long
+ limb vector + 1 extra character.
+
+ The result is NOT in Ascii, to convert it to printable format, add
+ '0' or 'A' depending on the base and range.
+
+ Return the number of digits in the result string.
+ This may include some leading zeros.
+
+ The limb vector pointed to by MPTR is clobbered. */
+
+size_t
+#if __STDC__
+mpn_get_str (unsigned char *str, int base, mp_ptr mptr, mp_size_t msize)
+#else
+mpn_get_str (str, base, mptr, msize)
+ unsigned char *str;
+ int base;
+ mp_ptr mptr;
+ mp_size_t msize;
+#endif
+{
+ mp_limb_t big_base;
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ int normalization_steps;
+#endif
+#if UDIV_TIME > 2 * UMUL_TIME
+ mp_limb_t big_base_inverted;
+#endif
+ unsigned int dig_per_u;
+ mp_size_t out_len;
+ register unsigned char *s;
+
+ big_base = __mp_bases[base].big_base;
+
+ s = str;
+
+ /* Special case zero, as the code below doesn't handle it. */
+ if (msize == 0)
+ {
+ s[0] = 0;
+ return 1;
+ }
+
+ if ((base & (base - 1)) == 0)
+ {
+ /* The base is a power of 2. Make conversion from most
+ significant side. */
+ mp_limb_t n1, n0;
+ register int bits_per_digit = big_base;
+ register int x;
+ register int bit_pos;
+ register int i;
+
+ n1 = mptr[msize - 1];
+ count_leading_zeros (x, n1);
+
+ /* BIT_POS should be R when input ends in least sign. nibble,
+ R + bits_per_digit * n when input ends in n:th least significant
+ nibble. */
+
+ {
+ int bits;
+
+ bits = BITS_PER_MP_LIMB * msize - x;
+ x = bits % bits_per_digit;
+ if (x != 0)
+ bits += bits_per_digit - x;
+ bit_pos = bits - (msize - 1) * BITS_PER_MP_LIMB;
+ }
+
+ /* Fast loop for bit output. */
+ i = msize - 1;
+ for (;;)
+ {
+ bit_pos -= bits_per_digit;
+ while (bit_pos >= 0)
+ {
+ *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+ bit_pos -= bits_per_digit;
+ }
+ i--;
+ if (i < 0)
+ break;
+ n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+ n1 = mptr[i];
+ bit_pos += BITS_PER_MP_LIMB;
+ *s++ = n0 | (n1 >> bit_pos);
+ }
+
+ *s = 0;
+
+ return s - str;
+ }
+ else
+ {
+ /* General case. The base is not a power of 2. Make conversion
+ from least significant end. */
+
+ /* If udiv_qrnnd only handles divisors with the most significant bit
+ set, prepare BIG_BASE for being a divisor by shifting it to the
+ left exactly enough to set the most significant bit. */
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ count_leading_zeros (normalization_steps, big_base);
+ big_base <<= normalization_steps;
+#if UDIV_TIME > 2 * UMUL_TIME
+ /* Get the fixed-point approximation to 1/(BIG_BASE << NORMALIZATION_STEPS). */
+ big_base_inverted = __mp_bases[base].big_base_inverted;
+#endif
+#endif
+
+ dig_per_u = __mp_bases[base].chars_per_limb;
+ out_len = ((size_t) msize * BITS_PER_MP_LIMB
+ * __mp_bases[base].chars_per_bit_exactly) + 1;
+ s += out_len;
+
+ while (msize != 0)
+ {
+ int i;
+ mp_limb_t n0, n1;
+
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ /* If we shifted BIG_BASE above, shift the dividend too, to get
+ the right quotient. We need to do this every loop,
+ since the intermediate quotients are OK, but the quotient from
+ one turn in the loop is going to be the dividend in the
+ next turn, and the dividend needs to be up-shifted. */
+ if (normalization_steps != 0)
+ {
+ n0 = mpn_lshift (mptr, mptr, msize, normalization_steps);
+
+ /* If the shifting gave a carry out limb, store it and
+ increase the length. */
+ if (n0 != 0)
+ {
+ mptr[msize] = n0;
+ msize++;
+ }
+ }
+#endif
+
+ /* Divide the number at TP with BIG_BASE to get a quotient and a
+ remainder. The remainder is our new digit in base BIG_BASE. */
+ i = msize - 1;
+ n1 = mptr[i];
+
+ if (n1 >= big_base)
+ n1 = 0;
+ else
+ {
+ msize--;
+ i--;
+ }
+
+ for (; i >= 0; i--)
+ {
+ n0 = mptr[i];
+#if UDIV_TIME > 2 * UMUL_TIME
+ udiv_qrnnd_preinv (mptr[i], n1, n1, n0, big_base, big_base_inverted);
+#else
+ udiv_qrnnd (mptr[i], n1, n1, n0, big_base);
+#endif
+ }
+
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ /* If we shifted above (at previous UDIV_NEEDS_NORMALIZATION tests)
+ the remainder will be up-shifted here. Compensate. */
+ n1 >>= normalization_steps;
+#endif
+
+ /* Convert N1 from BIG_BASE to a string of digits in BASE
+ using single precision operations. */
+ for (i = dig_per_u - 1; i >= 0; i--)
+ {
+ *--s = n1 % base;
+ n1 /= base;
+ if (n1 == 0 && msize == 0)
+ break;
+ }
+ }
+
+ while (s != str)
+ *--s = 0;
+ return out_len;
+ }
+}
diff --git a/rts/gmp/mpn/generic/gmp-mparam.h b/rts/gmp/mpn/generic/gmp-mparam.h
new file mode 100644
index 0000000000..14bcaece83
--- /dev/null
+++ b/rts/gmp/mpn/generic/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/rts/gmp/mpn/generic/hamdist.c b/rts/gmp/mpn/generic/hamdist.c
new file mode 100644
index 0000000000..35c10e8450
--- /dev/null
+++ b/rts/gmp/mpn/generic/hamdist.c
@@ -0,0 +1,94 @@
+/* mpn_hamdist --
+
+Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined __GNUC__
+/* No processor claiming to be SPARC v9 compliant seem to
+ implement the POPC instruction. Disable pattern for now. */
+#if 0 && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+#define popc_limb(a) \
+ ({ \
+ DItype __res; \
+ asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \
+ __res; \
+ })
+#endif
+#endif
+
+#ifndef popc_limb
+
+/* Cool population count of a mp_limb_t.
+ You have to figure out how this works, I won't tell you! */
+
+static inline unsigned int
+#if __STDC__
+popc_limb (mp_limb_t x)
+#else
+popc_limb (x)
+ mp_limb_t x;
+#endif
+{
+#if BITS_PER_MP_LIMB == 64
+ /* We have to go into some trouble to define these constants.
+ (For mp_limb_t being `long long'.) */
+ mp_limb_t cnst;
+ cnst = 0xaaaaaaaaL | ((mp_limb_t) 0xaaaaaaaaL << BITS_PER_MP_LIMB/2);
+ x -= (x & cnst) >> 1;
+ cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2);
+ x = ((x & ~cnst) >> 2) + (x & cnst);
+ cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2);
+ x = ((x >> 4) + x) & cnst;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x);
+ x = ((x >> 32) + x) & 0xff;
+#endif
+#if BITS_PER_MP_LIMB == 32
+ x -= (x & 0xaaaaaaaa) >> 1;
+ x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L);
+ x = ((x >> 4) + x) & 0x0f0f0f0fL;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x) & 0xff;
+#endif
+ return x;
+}
+#endif
+
+unsigned long int
+#if __STDC__
+mpn_hamdist (mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+mpn_hamdist (up, vp, size)
+ register mp_srcptr up;
+ register mp_srcptr vp;
+ register mp_size_t size;
+#endif
+{
+ unsigned long int hamdist;
+ mp_size_t i;
+
+ hamdist = 0;
+ for (i = 0; i < size; i++)
+ hamdist += popc_limb (up[i] ^ vp[i]);
+
+ return hamdist;
+}
diff --git a/rts/gmp/mpn/generic/inlines.c b/rts/gmp/mpn/generic/inlines.c
new file mode 100644
index 0000000000..9487e58cf2
--- /dev/null
+++ b/rts/gmp/mpn/generic/inlines.c
@@ -0,0 +1,24 @@
+/*
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#define _FORCE_INLINES
+#define _EXTERN_INLINE /* empty */
+#include "gmp.h"
diff --git a/rts/gmp/mpn/generic/jacbase.c b/rts/gmp/mpn/generic/jacbase.c
new file mode 100644
index 0000000000..dd437f1ac1
--- /dev/null
+++ b/rts/gmp/mpn/generic/jacbase.c
@@ -0,0 +1,136 @@
+/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
+
+ THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
+ INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if COUNT_TRAILING_ZEROS_TIME <= 7
+/* If count_trailing_zeros is fast, use it.
+ K7 at 7 cycles and P6 at 2 are good here. K6 at 12-27 and P5 at 18-42
+ are not. The default 15 in longlong.h is meant to mean not good here. */
+
+#define PROCESS_TWOS_ANY \
+ { \
+ mp_limb_t twos; \
+ count_trailing_zeros (twos, a); \
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \
+ a >>= twos; \
+ }
+
+#define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY
+
+#else
+/* Use a loop instead. With "a" uniformly distributed there will usually be
+ only a few trailing zeros.
+
+ Unfortunately the branch for the while loop here will be on a 50/50
+ chance of a 1 or 0, which is bad for branch prediction. */
+
+#define PROCESS_TWOS_EVEN \
+ { \
+ int two; \
+ two = JACOBI_TWO_U_BIT1 (b); \
+ do \
+ { \
+ a >>= 1; \
+ result_bit1 ^= two; \
+ ASSERT (a != 0); \
+ } \
+ while ((a & 1) == 0); \
+ }
+
+#define PROCESS_TWOS_ANY \
+ if ((a & 1) == 0) \
+ PROCESS_TWOS_EVEN;
+
+#endif
+
+
+/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
+ with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
+
+ The initial result_bit1 is taken as a parameter for the convenience of
+ mpz_kronecker_zi_ui() et al. The sign changes both here and in those
+ routines accumulate nicely in bit 1, see the JACOBI macros.
+
+ The return value here is the normal +1, 0, or -1. Note that +1 and -1
+ have bit 1 in the "BIT1" sense, which could be useful if the caller is
+ accumulating it into some extended calculation.
+
+ Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
+ possible, but a couple of tests suggest it's not a significant speedup,
+ and may even be a slowdown, so what's here is good enough for now.
+
+ Future: The code doesn't demand a<=b actually, so maybe this could be
+ relaxed. All the places this is used currently call with a<=b though. */
+
+int
+#if __STDC__
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
+#else
+mpn_jacobi_base (a, b, result_bit1)
+ mp_limb_t a;
+ mp_limb_t b;
+ int result_bit1;
+#endif
+{
+ ASSERT (b & 1); /* b odd */
+ ASSERT (b != 1);
+ ASSERT (a <= b);
+
+ if (a == 0)
+ return 0;
+
+ PROCESS_TWOS_ANY;
+ if (a == 1)
+ goto done;
+
+ for (;;)
+ {
+ result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
+ MP_LIMB_T_SWAP (a, b);
+
+ do
+ {
+ /* working on (a/b), a,b odd, a>=b */
+ ASSERT (a & 1);
+ ASSERT (b & 1);
+ ASSERT (a >= b);
+
+ if ((a -= b) == 0)
+ return 0;
+
+ PROCESS_TWOS_EVEN;
+ if (a == 1)
+ goto done;
+ }
+ while (a >= b);
+ }
+
+ done:
+ return JACOBI_BIT1_TO_PN (result_bit1);
+}
diff --git a/rts/gmp/mpn/generic/lshift.c b/rts/gmp/mpn/generic/lshift.c
new file mode 100644
index 0000000000..0b58389658
--- /dev/null
+++ b/rts/gmp/mpn/generic/lshift.c
@@ -0,0 +1,87 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ and store the USIZE least significant digits of the result at WP.
+ Return the bits shifted out from the most significant digit.
+
+ Argument constraints:
+ 1. 0 < CNT < BITS_PER_MP_LIMB
+ 2. If the result is to be written over the input, WP must be >= UP.
+*/
+
+mp_limb_t
+#if __STDC__
+mpn_lshift (register mp_ptr wp,
+ register mp_srcptr up, mp_size_t usize,
+ register unsigned int cnt)
+#else
+mpn_lshift (wp, up, usize, cnt)
+ register mp_ptr wp;
+ register mp_srcptr up;
+ mp_size_t usize;
+ register unsigned int cnt;
+#endif
+{
+ register mp_limb_t high_limb, low_limb;
+ register unsigned sh_1, sh_2;
+ register mp_size_t i;
+ mp_limb_t retval;
+
+#ifdef DEBUG
+ if (usize == 0 || cnt == 0)
+ abort ();
+#endif
+
+ sh_1 = cnt;
+#if 0
+ if (sh_1 == 0)
+ {
+ if (wp != up)
+ {
+ /* Copy from high end to low end, to allow specified input/output
+ overlapping. */
+ for (i = usize - 1; i >= 0; i--)
+ wp[i] = up[i];
+ }
+ return 0;
+ }
+#endif
+
+ wp += 1;
+ sh_2 = BITS_PER_MP_LIMB - sh_1;
+ i = usize - 1;
+ low_limb = up[i];
+ retval = low_limb >> sh_2;
+ high_limb = low_limb;
+ while (--i >= 0)
+ {
+ low_limb = up[i];
+ wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+ high_limb = low_limb;
+ }
+ wp[i] = high_limb << sh_1;
+
+ return retval;
+}
diff --git a/rts/gmp/mpn/generic/mod_1.c b/rts/gmp/mpn/generic/mod_1.c
new file mode 100644
index 0000000000..168ec9df49
--- /dev/null
+++ b/rts/gmp/mpn/generic/mod_1.c
@@ -0,0 +1,175 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+Copyright (C) 1991, 1993, 1994, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb_t divisor_limb)
+#else
+mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
+ mp_srcptr dividend_ptr;
+ mp_size_t dividend_size;
+ mp_limb_t divisor_limb;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t n1, n0, r;
+ int dummy;
+
+ /* Botch: Should this be handled at all? Rely on callers? */
+ if (dividend_size == 0)
+ return 0;
+
+ /* If multiplication is much faster than division, and the
+ dividend is large, pre-invert the divisor, and use
+ only multiplications in the inner loop. */
+
+ /* This test should be read:
+ Does it ever help to use udiv_qrnnd_preinv?
+ && Does what we save compensate for the inversion overhead? */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6)
+ && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ divisor_limb <<= normalization_steps;
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (dummy, r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb, divisor_limb_inverted);
+ n1 = n0;
+ }
+ udiv_qrnnd_preinv (dummy, r, r,
+ n1 << normalization_steps,
+ divisor_limb, divisor_limb_inverted);
+ return r >> normalization_steps;
+ }
+ else
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (dummy, r, r,
+ n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+ }
+ }
+ else
+ {
+ if (UDIV_NEEDS_NORMALIZATION)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ divisor_limb <<= normalization_steps;
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (dummy, r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb);
+ n1 = n0;
+ }
+ udiv_qrnnd (dummy, r, r,
+ n1 << normalization_steps,
+ divisor_limb);
+ return r >> normalization_steps;
+ }
+ }
+ /* No normalization needed, either because udiv_qrnnd doesn't require
+ it, or because DIVISOR_LIMB is already normalized. */
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (dummy, r, r, n0, divisor_limb);
+ }
+ return r;
+ }
+}
diff --git a/rts/gmp/mpn/generic/mod_1_rs.c b/rts/gmp/mpn/generic/mod_1_rs.c
new file mode 100644
index 0000000000..62aaa94b92
--- /dev/null
+++ b/rts/gmp/mpn/generic/mod_1_rs.c
@@ -0,0 +1,111 @@
+/* mpn_mod_1_rshift -- mpn remainder under hypothetical right shift.
+
+ THE FUNCTION IN THIS FILE IS FOR INTERNAL USE AND HAS A MUTABLE
+ INTERFACE. IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+ IT'S ALMOST GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* When testing on a CPU with UDIV_NEEDS_NORMALIZATION equal to 0, it can be
+ changed to 1 temporarily to test the code under that case too. */
+#if 0
+#undef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 1
+#endif
+
+
+/* Calculate the remainder "(ptr,size >> shift) % divisor". Note ptr,size
+ is unchanged, the shift is only for its effect on the remainder.
+ The shift doesn't even need to be considered until the last limb.
+
+ This function has the normal size!=0 restriction, unlike the basic
+ mpn_mod_1. */
+
+mp_limb_t
+#if __STDC__
+mpn_mod_1_rshift (mp_srcptr ptr, mp_size_t size, unsigned shift,
+ mp_limb_t divisor)
+#else
+mpn_mod_1_rshift (ptr, size, shift, divisor)
+ mp_srcptr ptr;
+ mp_size_t size;
+ unsigned shift;
+ mp_limb_t divisor;
+#endif
+{
+ mp_limb_t quot, rem;
+
+ ASSERT (shift >= 1);
+ ASSERT (shift < BITS_PER_MP_LIMB);
+ ASSERT (size >= 1);
+
+ if (size == 1)
+ return (ptr[0] >> shift) % divisor;
+
+#if UDIV_NEEDS_NORMALIZATION
+ {
+ int norm;
+ int delta;
+
+ count_leading_zeros (norm, divisor);
+ divisor <<= norm;
+
+ delta = shift - norm;
+ if (delta == 0)
+ return mpn_mod_1 (ptr, size, divisor) >> norm;
+
+ if (delta > 0)
+ {
+ rem = mpn_mod_1 (ptr+1, size-1, divisor);
+ udiv_qrnnd (quot, rem,
+ rem >> delta,
+ (rem << (BITS_PER_MP_LIMB-delta)) | (ptr[0] >> delta),
+ divisor);
+ return rem >> norm;
+ }
+ else
+ {
+ rem = mpn_mod_1 (ptr, size, divisor);
+ udiv_qrnnd (quot, rem,
+ rem >> (BITS_PER_MP_LIMB+delta),
+ rem << -delta,
+ divisor);
+ return rem >> norm;
+ }
+ }
+
+#else /* !UDIV_NEEDS_NORMALIZATION */
+
+ rem = mpn_mod_1 (ptr+1, size-1, divisor);
+ udiv_qrnnd (quot, rem,
+ rem >> shift,
+ (rem << (BITS_PER_MP_LIMB-shift)) | (ptr[0] >> shift),
+ divisor);
+ return rem;
+
+#endif
+}
diff --git a/rts/gmp/mpn/generic/mul.c b/rts/gmp/mpn/generic/mul.c
new file mode 100644
index 0000000000..cecfa19ca1
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul.c
@@ -0,0 +1,190 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+ THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul)
+ ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH
+ THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
+ THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
+ (pointed to by VP, with VN limbs), and store the result at PRODP. The
+ result is UN + VN limbs. Return the most significant limb of the result.
+
+ NOTE: The space pointed to by PRODP is overwritten before finished with U
+ and V, so overlap is an error.
+
+ Argument constraints:
+ 1. UN >= VN.
+ 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
+ the multiplier and the multiplicand. */
+
+void
+#if __STDC__
+mpn_sqr_n (mp_ptr prodp,
+ mp_srcptr up, mp_size_t un)
+#else
+mpn_sqr_n (prodp, up, un)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t un;
+#endif
+{
+ if (un < KARATSUBA_SQR_THRESHOLD)
+ { /* plain schoolbook multiplication */
+ if (un == 0)
+ return;
+ mpn_sqr_basecase (prodp, up, un);
+ }
+ else if (un < TOOM3_SQR_THRESHOLD)
+ { /* karatsuba multiplication */
+ mp_ptr tspace;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
+ mpn_kara_sqr_n (prodp, up, un, tspace);
+ TMP_FREE (marker);
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else if (un < FFT_SQR_THRESHOLD)
+#else
+ else
+#endif
+ { /* toom3 multiplication */
+ mp_ptr tspace;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
+ mpn_toom3_sqr_n (prodp, up, un, tspace);
+ TMP_FREE (marker);
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else
+ {
+ /* schoenhage multiplication */
+ mpn_mul_fft_full (prodp, up, un, up, un);
+ }
+#endif
+}
+
+mp_limb_t
+#if __STDC__
+mpn_mul (mp_ptr prodp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+#else
+mpn_mul (prodp, up, un, vp, vn)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t un;
+ mp_srcptr vp;
+ mp_size_t vn;
+#endif
+{
+ mp_size_t l;
+ mp_limb_t c;
+
+ if (up == vp && un == vn)
+ {
+ mpn_sqr_n (prodp, up, un);
+ return prodp[2 * un - 1];
+ }
+
+ if (vn < KARATSUBA_MUL_THRESHOLD)
+ { /* long multiplication */
+ mpn_mul_basecase (prodp, up, un, vp, vn);
+ return prodp[un + vn - 1];
+ }
+
+ mpn_mul_n (prodp, up, vp, vn);
+ if (un != vn)
+ { mp_limb_t t;
+ mp_ptr ws;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ prodp += vn;
+ l = vn;
+ up += vn;
+ un -= vn;
+
+ if (un < vn)
+ {
+ /* Swap u's and v's. */
+ MPN_SRCPTR_SWAP (up,un, vp,vn);
+ }
+
+ ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn)
+ * BYTES_PER_MP_LIMB);
+
+ t = 0;
+ while (vn >= KARATSUBA_MUL_THRESHOLD)
+ {
+ mpn_mul_n (ws, up, vp, vn);
+ if (l <= 2*vn)
+ {
+ t += mpn_add_n (prodp, prodp, ws, l);
+ if (l != 2*vn)
+ {
+ t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);
+ l = 2*vn;
+ }
+ }
+ else
+ {
+ c = mpn_add_n (prodp, prodp, ws, 2*vn);
+ t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);
+ }
+ prodp += vn;
+ l -= vn;
+ up += vn;
+ un -= vn;
+ if (un < vn)
+ {
+ /* Swap u's and v's. */
+ MPN_SRCPTR_SWAP (up,un, vp,vn);
+ }
+ }
+
+ if (vn)
+ {
+ mpn_mul_basecase (ws, up, un, vp, vn);
+ if (l <= un + vn)
+ {
+ t += mpn_add_n (prodp, prodp, ws, l);
+ if (l != un + vn)
+ t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);
+ }
+ else
+ {
+ c = mpn_add_n (prodp, prodp, ws, un + vn);
+ t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);
+ }
+ }
+
+ TMP_FREE (marker);
+ }
+ return prodp[un + vn - 1];
+}
diff --git a/rts/gmp/mpn/generic/mul_1.c b/rts/gmp/mpn/generic/mul_1.c
new file mode 100644
index 0000000000..1c36b5fb1f
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_1.c
@@ -0,0 +1,59 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and
+ store the product in a second limb vector.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+
+ /* The loop counter and index J goes from -S1_SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ res_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/mul_basecase.c b/rts/gmp/mpn/generic/mul_basecase.c
new file mode 100644
index 0000000000..00c06aa5c4
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_basecase.c
@@ -0,0 +1,87 @@
+/* mpn_mul_basecase -- Internal routine to multiply two natural numbers
+ of length m and n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Handle simple cases with traditional multiplication.
+
+ This is the most critical code of multiplication. All multiplies rely on
+ this, both small and huge. Small ones arrive here immediately, huge ones
+ arrive here as this is the base case for Karatsuba's recursive algorithm. */
+
+void
+#if __STDC__
+mpn_mul_basecase (mp_ptr prodp,
+ mp_srcptr up, mp_size_t usize,
+ mp_srcptr vp, mp_size_t vsize)
+#else
+mpn_mul_basecase (prodp, up, usize, vp, vsize)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t usize;
+ mp_srcptr vp;
+ mp_size_t vsize;
+#endif
+{
+ /* We first multiply by the low order one or two limbs, as the result can
+ be stored, not added, to PROD. We also avoid a loop for zeroing this
+ way. */
+#if HAVE_NATIVE_mpn_mul_2
+ if (vsize >= 2)
+ {
+ prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp[0], vp[1]);
+ prodp += 2, vp += 2, vsize -= 2;
+ }
+ else
+ {
+ prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
+ return;
+ }
+#else
+ prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
+ prodp += 1, vp += 1, vsize -= 1;
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+ while (vsize >= 2)
+ {
+ prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]);
+ prodp += 2, vp += 2, vsize -= 2;
+ }
+ if (vsize != 0)
+ prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
+#else
+ /* For each iteration in the loop, multiply U with one limb from V, and
+ add the result to PROD. */
+ while (vsize != 0)
+ {
+ prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
+ prodp += 1, vp += 1, vsize -= 1;
+ }
+#endif
+}
diff --git a/rts/gmp/mpn/generic/mul_fft.c b/rts/gmp/mpn/generic/mul_fft.c
new file mode 100644
index 0000000000..00fd6d72de
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_fft.c
@@ -0,0 +1,772 @@
+/* An implementation in GMP of Scho"nhage's fast multiplication algorithm
+ modulo 2^N+1, by Paul Zimmermann, INRIA Lorraine, February 1998.
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND THE FUNCTIONS HAVE
+ MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED
+ INTERFACES. IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN
+ A FUTURE GNU MP RELEASE.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+/* References:
+
+ Schnelle Multiplikation grosser Zahlen, by Arnold Scho"nhage and Volker
+ Strassen, Computing 7, p. 281-292, 1971.
+
+ Asymptotically fast algorithms for the numerical multiplication
+ and division of polynomials with complex coefficients, by Arnold Scho"nhage,
+ Computer Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982.
+
+ Tapes versus Pointers, a study in implementing fast algorithms,
+ by Arnold Scho"nhage, Bulletin of the EATCS, 30, p. 23-32, 1986.
+
+ See also http://www.loria.fr/~zimmerma/bignum
+
+
+ Future:
+
+ K==2 isn't needed in the current uses of this code and the bits specific
+ for that could be dropped.
+
+ It might be possible to avoid a small number of MPN_COPYs by using a
+ rotating temporary or two.
+
+ Multiplications of unequal sized operands can be done with this code, but
+ it needs a tighter test for identifying squaring (same sizes as well as
+ same pointers). */
+
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+
+FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] = {
+ FFT_MUL_TABLE,
+ FFT_SQR_TABLE
+};
+
+
+static void mpn_mul_fft_internal
+_PROTO ((mp_limb_t *op, mp_srcptr n, mp_srcptr m, mp_size_t pl,
+ int k, int K,
+ mp_limb_t **Ap, mp_limb_t **Bp,
+ mp_limb_t *A, mp_limb_t *B,
+ mp_size_t nprime, mp_size_t l, mp_size_t Mp, int **_fft_l,
+ mp_limb_t *T, int rec));
+
+
+/* Find the best k to use for a mod 2^(n*BITS_PER_MP_LIMB)+1 FFT.
+ sqr==0 if for a multiply, sqr==1 for a square */
+int
+#if __STDC__
+mpn_fft_best_k (mp_size_t n, int sqr)
+#else
+mpn_fft_best_k (n, sqr)
+ mp_size_t n;
+ int sqr;
+#endif
+{
+ mp_size_t t;
+ int i;
+
+ for (i = 0; mpn_fft_table[sqr][i] != 0; i++)
+ if (n < mpn_fft_table[sqr][i])
+ return i + FFT_FIRST_K;
+
+ /* treat 4*last as one further entry */
+ if (i == 0 || n < 4*mpn_fft_table[sqr][i-1])
+ return i + FFT_FIRST_K;
+ else
+ return i + FFT_FIRST_K + 1;
+}
+
+
+/* Returns smallest possible number of limbs >= pl for a fft of size 2^k.
+ FIXME: Is this simply pl rounded up to the next multiple of 2^k ? */
+
+mp_size_t
+#if __STDC__
+mpn_fft_next_size (mp_size_t pl, int k)
+#else
+mpn_fft_next_size (pl, k)
+ mp_size_t pl;
+ int k;
+#endif
+{
+ mp_size_t N, M;
+ int K;
+
+ /* if (k==0) k = mpn_fft_best_k (pl, sqr); */
+ N = pl*BITS_PER_MP_LIMB;
+ K = 1<<k;
+ if (N%K) N=(N/K+1)*K;
+ M = N/K;
+ if (M%BITS_PER_MP_LIMB) N=((M/BITS_PER_MP_LIMB)+1)*BITS_PER_MP_LIMB*K;
+ return (N/BITS_PER_MP_LIMB);
+}
+
+
+static void
+#if __STDC__
+mpn_fft_initl(int **l, int k)
+#else
+mpn_fft_initl(l, k)
+ int **l;
+ int k;
+#endif
+{
+ int i,j,K;
+
+ l[0][0] = 0;
+ for (i=1,K=2;i<=k;i++,K*=2) {
+ for (j=0;j<K/2;j++) {
+ l[i][j] = 2*l[i-1][j];
+ l[i][K/2+j] = 1+l[i][j];
+ }
+ }
+}
+
+
+/* a <- -a mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_neg_modF(mp_limb_t *ap, mp_size_t n)
+#else
+mpn_fft_neg_modF(ap, n)
+ mp_limb_t *ap;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t c;
+
+ c = ap[n]+2;
+ mpn_com_n (ap, ap, n);
+ ap[n]=0; mpn_incr_u(ap, c);
+}
+
+
+/* a <- a*2^e mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_mul_2exp_modF(mp_limb_t *ap, int e, mp_size_t n, mp_limb_t *tp)
+#else
+mpn_fft_mul_2exp_modF(ap, e, n, tp)
+ mp_limb_t *ap;
+ int e;
+ mp_size_t n;
+ mp_limb_t *tp;
+#endif
+{
+ int d, sh, i; mp_limb_t cc;
+
+ d = e%(n*BITS_PER_MP_LIMB); /* 2^e = (+/-) 2^d */
+ sh = d % BITS_PER_MP_LIMB;
+ if (sh) mpn_lshift(tp, ap, n+1, sh); /* no carry here */
+ else MPN_COPY(tp, ap, n+1);
+ d /= BITS_PER_MP_LIMB; /* now shift of d limbs to the left */
+ if (d) {
+ /* ap[d..n-1] = tp[0..n-d-1], ap[0..d-1] = -tp[n-d..n-1] */
+ /* mpn_xor would be more efficient here */
+ for (i=d-1;i>=0;i--) ap[i] = ~tp[n-d+i];
+ cc = 1-mpn_add_1(ap, ap, d, 1);
+ if (cc) cc=mpn_sub_1(ap+d, tp, n-d, 1);
+ else MPN_COPY(ap+d, tp, n-d);
+ if (cc+=mpn_sub_1(ap+d, ap+d, n-d, tp[n]))
+ ap[n]=mpn_add_1(ap, ap, n, cc);
+ else ap[n]=0;
+ }
+ else if ((ap[n]=mpn_sub_1(ap, tp, n, tp[n]))) {
+ ap[n]=mpn_add_1(ap, ap, n, 1);
+ }
+ if ((e/(n*BITS_PER_MP_LIMB))%2) mpn_fft_neg_modF(ap, n);
+}
+
+
+/* a <- a+b mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_add_modF (mp_limb_t *ap, mp_limb_t *bp, int n)
+#else
+mpn_fft_add_modF (ap, bp, n)
+ mp_limb_t *ap,*bp;
+ int n;
+#endif
+{
+ mp_limb_t c;
+
+ c = ap[n] + bp[n] + mpn_add_n(ap, ap, bp, n);
+ if (c>1) c -= 1+mpn_sub_1(ap,ap,n,1);
+ ap[n]=c;
+}
+
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+ N=n*BITS_PER_MP_LIMB
+ 2^omega is a primitive root mod 2^N+1
+ output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+#if __STDC__
+mpn_fft_fft_sqr (mp_limb_t **Ap, mp_size_t K, int **ll,
+ mp_size_t omega, mp_size_t n, mp_size_t inc, mp_limb_t *tp)
+#else
+mpn_fft_fft_sqr(Ap,K,ll,omega,n,inc,tp)
+mp_limb_t **Ap,*tp;
+mp_size_t K,omega,n,inc;
+int **ll;
+#endif
+{
+ if (K==2) {
+#ifdef ADDSUB
+ if (mpn_addsub_n(Ap[0], Ap[inc], Ap[0], Ap[inc], n+1) & 1)
+#else
+ MPN_COPY(tp, Ap[0], n+1);
+ mpn_add_n(Ap[0], Ap[0], Ap[inc],n+1);
+ if (mpn_sub_n(Ap[inc], tp, Ap[inc],n+1))
+#endif
+ Ap[inc][n] = mpn_add_1(Ap[inc], Ap[inc], n, 1);
+ }
+ else {
+ int j, inc2=2*inc;
+ int *lk = *ll;
+ mp_limb_t *tmp;
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+ tmp = TMP_ALLOC_LIMBS (n+1);
+ mpn_fft_fft_sqr(Ap, K/2,ll-1,2*omega,n,inc2, tp);
+ mpn_fft_fft_sqr(Ap+inc, K/2,ll-1,2*omega,n,inc2, tp);
+ /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+ A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+ for (j=0;j<K/2;j++,lk+=2,Ap+=2*inc) {
+ MPN_COPY(tp, Ap[inc], n+1);
+ mpn_fft_mul_2exp_modF(Ap[inc], lk[1]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[inc], Ap[0], n);
+ mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[0], tp, n);
+ }
+ TMP_FREE(marker);
+ }
+}
+
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+ N=n*BITS_PER_MP_LIMB
+ 2^omega is a primitive root mod 2^N+1
+ output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+#if __STDC__
+mpn_fft_fft (mp_limb_t **Ap, mp_limb_t **Bp, mp_size_t K, int **ll,
+ mp_size_t omega, mp_size_t n, mp_size_t inc, mp_limb_t *tp)
+#else
+mpn_fft_fft(Ap,Bp,K,ll,omega,n,inc,tp)
+ mp_limb_t **Ap,**Bp,*tp;
+ mp_size_t K,omega,n,inc;
+ int **ll;
+#endif
+{
+ if (K==2) {
+#ifdef ADDSUB
+ if (mpn_addsub_n(Ap[0], Ap[inc], Ap[0], Ap[inc], n+1) & 1)
+#else
+ MPN_COPY(tp, Ap[0], n+1);
+ mpn_add_n(Ap[0], Ap[0], Ap[inc],n+1);
+ if (mpn_sub_n(Ap[inc], tp, Ap[inc],n+1))
+#endif
+ Ap[inc][n] = mpn_add_1(Ap[inc], Ap[inc], n, 1);
+#ifdef ADDSUB
+ if (mpn_addsub_n(Bp[0], Bp[inc], Bp[0], Bp[inc], n+1) & 1)
+#else
+ MPN_COPY(tp, Bp[0], n+1);
+ mpn_add_n(Bp[0], Bp[0], Bp[inc],n+1);
+ if (mpn_sub_n(Bp[inc], tp, Bp[inc],n+1))
+#endif
+ Bp[inc][n] = mpn_add_1(Bp[inc], Bp[inc], n, 1);
+ }
+ else {
+ int j, inc2=2*inc;
+ int *lk=*ll;
+ mp_limb_t *tmp;
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+ tmp = TMP_ALLOC_LIMBS (n+1);
+ mpn_fft_fft(Ap, Bp, K/2,ll-1,2*omega,n,inc2, tp);
+ mpn_fft_fft(Ap+inc, Bp+inc, K/2,ll-1,2*omega,n,inc2, tp);
+ /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+ A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+ for (j=0;j<K/2;j++,lk+=2,Ap+=2*inc,Bp+=2*inc) {
+ MPN_COPY(tp, Ap[inc], n+1);
+ mpn_fft_mul_2exp_modF(Ap[inc], lk[1]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[inc], Ap[0], n);
+ mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[0], tp, n);
+ MPN_COPY(tp, Bp[inc], n+1);
+ mpn_fft_mul_2exp_modF(Bp[inc], lk[1]*omega, n, tmp);
+ mpn_fft_add_modF(Bp[inc], Bp[0], n);
+ mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp);
+ mpn_fft_add_modF(Bp[0], tp, n);
+ }
+ TMP_FREE(marker);
+ }
+}
+
+
+/* a[i] <- a[i]*b[i] mod 2^(n*BITS_PER_MP_LIMB)+1 for 0 <= i < K */
+static void
+#if __STDC__
+mpn_fft_mul_modF_K (mp_limb_t **ap, mp_limb_t **bp, mp_size_t n, int K)
+#else
+mpn_fft_mul_modF_K(ap, bp, n, K)
+ mp_limb_t **ap, **bp;
+ mp_size_t n;
+ int K;
+#endif
+{
+ int i;
+ int sqr = (ap == bp);
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+
+ if (n >= (sqr ? FFT_MODF_SQR_THRESHOLD : FFT_MODF_MUL_THRESHOLD)) {
+ int k, K2,nprime2,Nprime2,M2,maxLK,l,Mp2;
+ int **_fft_l;
+ mp_limb_t **Ap,**Bp,*A,*B,*T;
+
+ k = mpn_fft_best_k (n, sqr);
+ K2 = 1<<k;
+ maxLK = (K2>BITS_PER_MP_LIMB) ? K2 : BITS_PER_MP_LIMB;
+ M2 = n*BITS_PER_MP_LIMB/K2;
+ l = n/K2;
+ Nprime2 = ((2*M2+k+2+maxLK)/maxLK)*maxLK; /* ceil((2*M2+k+3)/maxLK)*maxLK*/
+ nprime2 = Nprime2/BITS_PER_MP_LIMB;
+ Mp2 = Nprime2/K2;
+
+ Ap = TMP_ALLOC_MP_PTRS (K2);
+ Bp = TMP_ALLOC_MP_PTRS (K2);
+ A = TMP_ALLOC_LIMBS (2*K2*(nprime2+1));
+ T = TMP_ALLOC_LIMBS (nprime2+1);
+ B = A + K2*(nprime2+1);
+ _fft_l = TMP_ALLOC_TYPE (k+1, int*);
+ for (i=0;i<=k;i++)
+ _fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+ mpn_fft_initl(_fft_l, k);
+
+ TRACE (printf("recurse: %dx%d limbs -> %d times %dx%d (%1.2f)\n", n,
+ n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
+
+ for (i=0;i<K;i++,ap++,bp++)
+ mpn_mul_fft_internal(*ap, *ap, *bp, n, k, K2, Ap, Bp, A, B, nprime2,
+ l, Mp2, _fft_l, T, 1);
+ }
+ else {
+ mp_limb_t *a, *b, cc, *tp, *tpn; int n2=2*n;
+ tp = TMP_ALLOC_LIMBS (n2);
+ tpn = tp+n;
+ TRACE (printf (" mpn_mul_n %d of %d limbs\n", K, n));
+ for (i=0;i<K;i++) {
+ a = *ap++; b=*bp++;
+ if (sqr)
+ mpn_sqr_n(tp, a, n);
+ else
+ mpn_mul_n(tp, b, a, n);
+ if (a[n]) cc=mpn_add_n(tpn, tpn, b, n); else cc=0;
+ if (b[n]) cc += mpn_add_n(tpn, tpn, a, n) + a[n];
+ if (cc) {
+ cc = mpn_add_1(tp, tp, n2, cc);
+ ASSERT_NOCARRY (mpn_add_1(tp, tp, n2, cc));
+ }
+ a[n] = mpn_sub_n(a, tp, tpn, n) && mpn_add_1(a, a, n, 1);
+ }
+ }
+ TMP_FREE(marker);
+}
+
+
+/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]]
+ output: K*A[0] K*A[K-1] ... K*A[1] */
+
+static void
+#if __STDC__
+mpn_fft_fftinv (mp_limb_t **Ap, int K, mp_size_t omega, mp_size_t n,
+ mp_limb_t *tp)
+#else
+mpn_fft_fftinv(Ap,K,omega,n,tp)
+ mp_limb_t **Ap, *tp;
+ int K;
+ mp_size_t omega, n;
+#endif
+{
+ if (K==2) {
+#ifdef ADDSUB
+ if (mpn_addsub_n(Ap[0], Ap[1], Ap[0], Ap[1], n+1) & 1)
+#else
+ MPN_COPY(tp, Ap[0], n+1);
+ mpn_add_n(Ap[0], Ap[0], Ap[1], n+1);
+ if (mpn_sub_n(Ap[1], tp, Ap[1], n+1))
+#endif
+ Ap[1][n] = mpn_add_1(Ap[1], Ap[1], n, 1);
+ }
+ else {
+ int j, K2=K/2; mp_limb_t **Bp=Ap+K2, *tmp;
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+ tmp = TMP_ALLOC_LIMBS (n+1);
+ mpn_fft_fftinv(Ap, K2, 2*omega, n, tp);
+ mpn_fft_fftinv(Bp, K2, 2*omega, n, tp);
+ /* A[j] <- A[j] + omega^j A[j+K/2]
+ A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */
+ for (j=0;j<K2;j++,Ap++,Bp++) {
+ MPN_COPY(tp, Bp[0], n+1);
+ mpn_fft_mul_2exp_modF(Bp[0], (j+K2)*omega, n, tmp);
+ mpn_fft_add_modF(Bp[0], Ap[0], n);
+ mpn_fft_mul_2exp_modF(tp, j*omega, n, tmp);
+ mpn_fft_add_modF(Ap[0], tp, n);
+ }
+ TMP_FREE(marker);
+ }
+}
+
+
+/* A <- A/2^k mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_div_2exp_modF (mp_limb_t *ap, int k, mp_size_t n, mp_limb_t *tp)
+#else
+mpn_fft_div_2exp_modF(ap,k,n,tp)
+ mp_limb_t *ap,*tp;
+ int k;
+ mp_size_t n;
+#endif
+{
+ int i;
+
+ i = 2*n*BITS_PER_MP_LIMB;
+ i = (i-k) % i;
+ mpn_fft_mul_2exp_modF(ap,i,n,tp);
+ /* 1/2^k = 2^(2nL-k) mod 2^(n*BITS_PER_MP_LIMB)+1 */
+ /* normalize so that A < 2^(n*BITS_PER_MP_LIMB)+1 */
+ if (ap[n]==1) {
+ for (i=0;i<n && ap[i]==0;i++);
+ if (i<n) {
+ ap[n]=0;
+ mpn_sub_1(ap, ap, n, 1);
+ }
+ }
+}
+
+
+/* R <- A mod 2^(n*BITS_PER_MP_LIMB)+1, n<=an<=3*n */
+static void
+#if __STDC__
+mpn_fft_norm_modF(mp_limb_t *rp, mp_limb_t *ap, mp_size_t n, mp_size_t an)
+#else
+mpn_fft_norm_modF(rp, ap, n, an)
+ mp_limb_t *rp;
+ mp_limb_t *ap;
+ mp_size_t n;
+ mp_size_t an;
+#endif
+{
+ mp_size_t l;
+
+ if (an>2*n) {
+ l = n;
+ rp[n] = mpn_add_1(rp+an-2*n, ap+an-2*n, 3*n-an,
+ mpn_add_n(rp,ap,ap+2*n,an-2*n));
+ }
+ else {
+ l = an-n;
+ MPN_COPY(rp, ap, n);
+ rp[n]=0;
+ }
+ if (mpn_sub_n(rp,rp,ap+n,l)) {
+ if (mpn_sub_1(rp+l,rp+l,n+1-l,1))
+ rp[n]=mpn_add_1(rp,rp,n,1);
+ }
+}
+
+
+static void
+#if __STDC__
+mpn_mul_fft_internal(mp_limb_t *op, mp_srcptr n, mp_srcptr m, mp_size_t pl,
+ int k, int K,
+ mp_limb_t **Ap, mp_limb_t **Bp,
+ mp_limb_t *A, mp_limb_t *B,
+ mp_size_t nprime, mp_size_t l, mp_size_t Mp,
+ int **_fft_l,
+ mp_limb_t *T, int rec)
+#else
+mpn_mul_fft_internal(op,n,m,pl,k,K,Ap,Bp,A,B,nprime,l,Mp,_fft_l,T,rec)
+ mp_limb_t *op;
+ mp_srcptr n, m;
+ mp_limb_t **Ap,**Bp,*A,*B,*T;
+ mp_size_t pl,nprime;
+ int **_fft_l;
+ int k,K,l,Mp,rec;
+#endif
+{
+ int i, sqr, pla, lo, sh, j;
+ mp_limb_t *p;
+
+ sqr = (n==m);
+
+ TRACE (printf ("pl=%d k=%d K=%d np=%d l=%d Mp=%d rec=%d sqr=%d\n",
+ pl,k,K,nprime,l,Mp,rec,sqr));
+
+ /* decomposition of inputs into arrays Ap[i] and Bp[i] */
+ if (rec) for (i=0;i<K;i++) {
+ Ap[i] = A+i*(nprime+1); Bp[i] = B+i*(nprime+1);
+ /* store the next M bits of n into A[i] */
+ /* supposes that M is a multiple of BITS_PER_MP_LIMB */
+ MPN_COPY(Ap[i], n, l); n+=l; MPN_ZERO(Ap[i]+l, nprime+1-l);
+ /* set most significant bits of n and m (important in recursive calls) */
+ if (i==K-1) Ap[i][l]=n[0];
+ mpn_fft_mul_2exp_modF(Ap[i], i*Mp, nprime, T);
+ if (!sqr) {
+ MPN_COPY(Bp[i], m, l); m+=l; MPN_ZERO(Bp[i]+l, nprime+1-l);
+ if (i==K-1) Bp[i][l]=m[0];
+ mpn_fft_mul_2exp_modF(Bp[i], i*Mp, nprime, T);
+ }
+ }
+
+ /* direct fft's */
+ if (sqr) mpn_fft_fft_sqr(Ap,K,_fft_l+k,2*Mp,nprime,1, T);
+ else mpn_fft_fft(Ap,Bp,K,_fft_l+k,2*Mp,nprime,1, T);
+
+ /* term to term multiplications */
+ mpn_fft_mul_modF_K(Ap, (sqr) ? Ap : Bp, nprime, K);
+
+ /* inverse fft's */
+ mpn_fft_fftinv(Ap, K, 2*Mp, nprime, T);
+
+ /* division of terms after inverse fft */
+ for (i=0;i<K;i++) mpn_fft_div_2exp_modF(Ap[i],k+((K-i)%K)*Mp,nprime, T);
+
+ /* addition of terms in result p */
+ MPN_ZERO(T,nprime+1);
+ pla = l*(K-1)+nprime+1; /* number of required limbs for p */
+ p = B; /* B has K*(n'+1) limbs, which is >= pla, i.e. enough */
+ MPN_ZERO(p, pla);
+ sqr=0; /* will accumulate the (signed) carry at p[pla] */
+ for (i=K-1,lo=l*i+nprime,sh=l*i;i>=0;i--,lo-=l,sh-=l) {
+ mp_ptr n = p+sh;
+ j = (K-i)%K;
+ if (mpn_add_n(n,n,Ap[j],nprime+1))
+ sqr += mpn_add_1(n+nprime+1,n+nprime+1,pla-sh-nprime-1,1);
+ T[2*l]=i+1; /* T = (i+1)*2^(2*M) */
+ if (mpn_cmp(Ap[j],T,nprime+1)>0) { /* subtract 2^N'+1 */
+ sqr -= mpn_sub_1(n,n,pla-sh,1);
+ sqr -= mpn_sub_1(p+lo,p+lo,pla-lo,1);
+ }
+ }
+ if (sqr==-1) {
+ if ((sqr=mpn_add_1(p+pla-pl,p+pla-pl,pl,1))) {
+ /* p[pla-pl]...p[pla-1] are all zero */
+ mpn_sub_1(p+pla-pl-1,p+pla-pl-1,pl+1,1);
+ mpn_sub_1(p+pla-1,p+pla-1,1,1);
+ }
+ }
+ else if (sqr==1) {
+ if (pla>=2*pl)
+ while ((sqr=mpn_add_1(p+pla-2*pl,p+pla-2*pl,2*pl,sqr)));
+ else {
+ sqr = mpn_sub_1(p+pla-pl,p+pla-pl,pl,sqr);
+ ASSERT (sqr == 0);
+ }
+ }
+ else
+ ASSERT (sqr == 0);
+
+ /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
+ < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
+ < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
+ mpn_fft_norm_modF(op,p,pl,pla);
+}
+
+
+/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*BITS_PER_MP_LIMB
+ n and m have respectively nl and ml limbs
+ op must have space for pl+1 limbs
+ One must have pl = mpn_fft_next_size(pl, k).
+*/
+
+void
+#if __STDC__
+mpn_mul_fft (mp_ptr op, mp_size_t pl,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml,
+ int k)
+#else
+mpn_mul_fft (op, pl, n, nl, m, ml, k)
+ mp_ptr op;
+ mp_size_t pl;
+ mp_srcptr n;
+ mp_size_t nl;
+ mp_srcptr m;
+ mp_size_t ml;
+ int k;
+#endif
+{
+ int K,maxLK,i,j;
+ mp_size_t N,Nprime,nprime,M,Mp,l;
+ mp_limb_t **Ap,**Bp,*A,*T,*B;
+ int **_fft_l;
+ int sqr = (n==m && nl==ml);
+ TMP_DECL(marker);
+
+ TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n",
+ pl, nl, ml, k));
+ ASSERT_ALWAYS (mpn_fft_next_size(pl, k) == pl);
+
+ TMP_MARK(marker);
+ N = pl*BITS_PER_MP_LIMB;
+ _fft_l = TMP_ALLOC_TYPE (k+1, int*);
+ for (i=0;i<=k;i++)
+ _fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+ mpn_fft_initl(_fft_l, k);
+ K = 1<<k;
+ M = N/K; /* N = 2^k M */
+ l = M/BITS_PER_MP_LIMB;
+ maxLK = (K>BITS_PER_MP_LIMB) ? K : BITS_PER_MP_LIMB;
+
+ Nprime = ((2*M+k+2+maxLK)/maxLK)*maxLK; /* ceil((2*M+k+3)/maxLK)*maxLK; */
+ nprime = Nprime/BITS_PER_MP_LIMB;
+ TRACE (printf ("N=%d K=%d, M=%d, l=%d, maxLK=%d, Np=%d, np=%d\n",
+ N, K, M, l, maxLK, Nprime, nprime));
+ if (nprime >= (sqr ? FFT_MODF_SQR_THRESHOLD : FFT_MODF_MUL_THRESHOLD)) {
+ maxLK = (1<<mpn_fft_best_k(nprime,n==m))*BITS_PER_MP_LIMB;
+ if (Nprime % maxLK) {
+ Nprime=((Nprime/maxLK)+1)*maxLK;
+ nprime = Nprime/BITS_PER_MP_LIMB;
+ }
+ TRACE (printf ("new maxLK=%d, Np=%d, np=%d\n", maxLK, Nprime, nprime));
+ }
+
+ T = TMP_ALLOC_LIMBS (nprime+1);
+ Mp = Nprime/K;
+
+ TRACE (printf("%dx%d limbs -> %d times %dx%d limbs (%1.2f)\n",
+ pl,pl,K,nprime,nprime,2.0*(double)N/Nprime/K);
+ printf(" temp space %ld\n", 2*K*(nprime+1)));
+
+ A = _MP_ALLOCATE_FUNC_LIMBS (2*K*(nprime+1));
+ B = A+K*(nprime+1);
+ Ap = TMP_ALLOC_MP_PTRS (K);
+ Bp = TMP_ALLOC_MP_PTRS (K);
+ /* special decomposition for main call */
+ for (i=0;i<K;i++) {
+ Ap[i] = A+i*(nprime+1); Bp[i] = B+i*(nprime+1);
+ /* store the next M bits of n into A[i] */
+ /* supposes that M is a multiple of BITS_PER_MP_LIMB */
+ if (nl>0) {
+ j = (nl>=l) ? l : nl; /* limbs to store in Ap[i] */
+ MPN_COPY(Ap[i], n, j); n+=l; MPN_ZERO(Ap[i]+j, nprime+1-j);
+ mpn_fft_mul_2exp_modF(Ap[i], i*Mp, nprime, T);
+ }
+ else MPN_ZERO(Ap[i], nprime+1);
+ nl -= l;
+ if (n!=m) {
+ if (ml>0) {
+ j = (ml>=l) ? l : ml; /* limbs to store in Bp[i] */
+ MPN_COPY(Bp[i], m, j); m+=l; MPN_ZERO(Bp[i]+j, nprime+1-j);
+ mpn_fft_mul_2exp_modF(Bp[i], i*Mp, nprime, T);
+ }
+ else MPN_ZERO(Bp[i], nprime+1);
+ }
+ ml -= l;
+ }
+ mpn_mul_fft_internal(op,n,m,pl,k,K,Ap,Bp,A,B,nprime,l,Mp,_fft_l,T,0);
+ TMP_FREE(marker);
+ _MP_FREE_FUNC_LIMBS (A, 2*K*(nprime+1));
+}
+
+
+#if WANT_ASSERT
+static int
+#if __STDC__
+mpn_zero_p (mp_ptr p, mp_size_t n)
+#else
+ mpn_zero_p (p, n)
+ mp_ptr p;
+ mp_size_t n;
+#endif
+{
+ mp_size_t i;
+
+ for (i = 0; i < n; i++)
+ {
+ if (p[i] != 0)
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
+
+/* Multiply {n,nl}*{m,ml} and write the result to {op,nl+ml}.
+
+ FIXME: Duplicating the result like this is wasteful, do something better
+ perhaps at the norm_modF stage above. */
+
+void
+#if __STDC__
+mpn_mul_fft_full (mp_ptr op,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml)
+#else
+mpn_mul_fft_full (op, n, nl, m, ml)
+ mp_ptr op;
+ mp_srcptr n;
+ mp_size_t nl;
+ mp_srcptr m;
+ mp_size_t ml;
+#endif
+{
+ mp_ptr pad_op;
+ mp_size_t pl;
+ int k;
+ int sqr = (n==m && nl==ml);
+
+ k = mpn_fft_best_k (nl+ml, sqr);
+ pl = mpn_fft_next_size (nl+ml, k);
+
+ TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl=%ld k=%d\n",
+ nl, ml, pl, k));
+
+ pad_op = _MP_ALLOCATE_FUNC_LIMBS (pl+1);
+ mpn_mul_fft (pad_op, pl, n, nl, m, ml, k);
+
+ ASSERT (mpn_zero_p (pad_op+nl+ml, pl+1-(nl+ml)));
+ MPN_COPY (op, pad_op, nl+ml);
+
+ _MP_FREE_FUNC_LIMBS (pad_op, pl+1);
+}
diff --git a/rts/gmp/mpn/generic/mul_n.c b/rts/gmp/mpn/generic/mul_n.c
new file mode 100644
index 0000000000..b7563be2d3
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_n.c
@@ -0,0 +1,1343 @@
+/* mpn_mul_n and helper function -- Multiply/square natural numbers.
+
+ THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n)
+ ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH
+ THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
+ THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
+ 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
+#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1)
+
+#if !defined (__alpha) && !defined (__mips)
+/* For all other machines, we want to call mpn functions for the compund
+ operations instead of open-coding them. */
+#define USE_MORE_MPN
+#endif
+
+/*== Function declarations =================================================*/
+
+static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr,
+ mp_ptr, mp_ptr, mp_ptr,
+ mp_srcptr, mp_srcptr, mp_srcptr,
+ mp_size_t, mp_size_t));
+static void interpolate3 _PROTO ((mp_srcptr,
+ mp_ptr, mp_ptr, mp_ptr,
+ mp_srcptr,
+ mp_ptr, mp_ptr, mp_ptr,
+ mp_size_t, mp_size_t));
+static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+
+/*-- mpn_kara_mul_n ---------------------------------------------------------------*/
+
+/* Multiplies using 3 half-sized mults and so on recursively.
+ * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
+ * No overlap of p[...] with a[...] or b[...].
+ * ws is workspace.
+ */
+
+void
+#if __STDC__
+mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
+#else
+mpn_kara_mul_n(p, a, b, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_srcptr b;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t i, sign, w, w0, w1;
+ mp_size_t n2;
+ mp_srcptr x, y;
+
+ n2 = n >> 1;
+ ASSERT (n2 > 0);
+
+ if (n & 1)
+ {
+ /* Odd length. */
+ mp_size_t n1, n3, nm1;
+
+ n3 = n - n2;
+
+ sign = 0;
+ w = a[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p, a, a + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n3;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n3;
+ }
+ mpn_sub_n (p, x, y, n2);
+ }
+ p[n2] = w;
+
+ w = b[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p + n3, b, b + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = b[i];
+ w1 = b[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = b + n3;
+ y = b;
+ sign ^= 1;
+ }
+ else
+ {
+ x = b;
+ y = b + n3;
+ }
+ mpn_sub_n (p + n3, x, y, n2);
+ }
+ p[n] = w;
+
+ n1 = n + 1;
+ if (n2 < KARATSUBA_MUL_THRESHOLD)
+ {
+ if (n3 < KARATSUBA_MUL_THRESHOLD)
+ {
+ mpn_mul_basecase (ws, p, n3, p + n3, n3);
+ mpn_mul_basecase (p, a, n3, b, n3);
+ }
+ else
+ {
+ mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+ mpn_kara_mul_n (p, a, b, n3, ws + n1);
+ }
+ mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);
+ }
+ else
+ {
+ mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+ mpn_kara_mul_n (p, a, b, n3, ws + n1);
+ mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);
+ }
+
+ if (sign)
+ mpn_add_n (ws, p, ws, n1);
+ else
+ mpn_sub_n (ws, p, ws, n1);
+
+ nm1 = n - 1;
+ if (mpn_add_n (ws, p + n1, ws, nm1))
+ {
+ mp_limb_t x = ws[nm1] + 1;
+ ws[nm1] = x;
+ if (x == 0)
+ ++ws[n];
+ }
+ if (mpn_add_n (p + n3, p + n3, ws, n1))
+ {
+ mp_limb_t x;
+ i = n1 + n3;
+ do
+ {
+ x = p[i] + 1;
+ p[i] = x;
+ ++i;
+ } while (x == 0);
+ }
+ }
+ else
+ {
+ /* Even length. */
+ mp_limb_t t;
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ sign = 0;
+ if (w0 < w1)
+ {
+ x = a + n2;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n2;
+ }
+ mpn_sub_n (p, x, y, n2);
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = b[i];
+ w1 = b[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = b + n2;
+ y = b;
+ sign ^= 1;
+ }
+ else
+ {
+ x = b;
+ y = b + n2;
+ }
+ mpn_sub_n (p + n2, x, y, n2);
+
+ /* Pointwise products. */
+ if (n2 < KARATSUBA_MUL_THRESHOLD)
+ {
+ mpn_mul_basecase (ws, p, n2, p + n2, n2);
+ mpn_mul_basecase (p, a, n2, b, n2);
+ mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);
+ }
+ else
+ {
+ mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);
+ mpn_kara_mul_n (p, a, b, n2, ws + n);
+ mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);
+ }
+
+ /* Interpolate. */
+ if (sign)
+ w = mpn_add_n (ws, p, ws, n);
+ else
+ w = -mpn_sub_n (ws, p, ws, n);
+ w += mpn_add_n (ws, p + n, ws, n);
+ w += mpn_add_n (p + n2, p + n2, ws, n);
+ /* TO DO: could put "if (w) { ... }" here.
+ * Less work but badly predicted branch.
+ * No measurable difference in speed on Alpha.
+ */
+ i = n + n2;
+ t = p[i] + w;
+ p[i] = t;
+ if (t < w)
+ {
+ do
+ {
+ ++i;
+ w = p[i] + 1;
+ p[i] = w;
+ }
+ while (w == 0);
+ }
+ }
+}
+
+void
+#if __STDC__
+mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
+#else
+mpn_kara_sqr_n (p, a, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t i, sign, w, w0, w1;
+ mp_size_t n2;
+ mp_srcptr x, y;
+
+ n2 = n >> 1;
+ ASSERT (n2 > 0);
+
+ if (n & 1)
+ {
+ /* Odd length. */
+ mp_size_t n1, n3, nm1;
+
+ n3 = n - n2;
+
+ sign = 0;
+ w = a[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p, a, a + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n3;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n3;
+ }
+ mpn_sub_n (p, x, y, n2);
+ }
+ p[n2] = w;
+
+ w = a[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p + n3, a, a + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n3;
+ y = a;
+ sign ^= 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n3;
+ }
+ mpn_sub_n (p + n3, x, y, n2);
+ }
+ p[n] = w;
+
+ n1 = n + 1;
+ if (n2 < KARATSUBA_SQR_THRESHOLD)
+ {
+ if (n3 < KARATSUBA_SQR_THRESHOLD)
+ {
+ mpn_sqr_basecase (ws, p, n3);
+ mpn_sqr_basecase (p, a, n3);
+ }
+ else
+ {
+ mpn_kara_sqr_n (ws, p, n3, ws + n1);
+ mpn_kara_sqr_n (p, a, n3, ws + n1);
+ }
+ mpn_sqr_basecase (p + n1, a + n3, n2);
+ }
+ else
+ {
+ mpn_kara_sqr_n (ws, p, n3, ws + n1);
+ mpn_kara_sqr_n (p, a, n3, ws + n1);
+ mpn_kara_sqr_n (p + n1, a + n3, n2, ws + n1);
+ }
+
+ if (sign)
+ mpn_add_n (ws, p, ws, n1);
+ else
+ mpn_sub_n (ws, p, ws, n1);
+
+ nm1 = n - 1;
+ if (mpn_add_n (ws, p + n1, ws, nm1))
+ {
+ mp_limb_t x = ws[nm1] + 1;
+ ws[nm1] = x;
+ if (x == 0)
+ ++ws[n];
+ }
+ if (mpn_add_n (p + n3, p + n3, ws, n1))
+ {
+ mp_limb_t x;
+ i = n1 + n3;
+ do
+ {
+ x = p[i] + 1;
+ p[i] = x;
+ ++i;
+ } while (x == 0);
+ }
+ }
+ else
+ {
+ /* Even length. */
+ mp_limb_t t;
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ sign = 0;
+ if (w0 < w1)
+ {
+ x = a + n2;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n2;
+ }
+ mpn_sub_n (p, x, y, n2);
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n2;
+ y = a;
+ sign ^= 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n2;
+ }
+ mpn_sub_n (p + n2, x, y, n2);
+
+ /* Pointwise products. */
+ if (n2 < KARATSUBA_SQR_THRESHOLD)
+ {
+ mpn_sqr_basecase (ws, p, n2);
+ mpn_sqr_basecase (p, a, n2);
+ mpn_sqr_basecase (p + n, a + n2, n2);
+ }
+ else
+ {
+ mpn_kara_sqr_n (ws, p, n2, ws + n);
+ mpn_kara_sqr_n (p, a, n2, ws + n);
+ mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
+ }
+
+ /* Interpolate. */
+ if (sign)
+ w = mpn_add_n (ws, p, ws, n);
+ else
+ w = -mpn_sub_n (ws, p, ws, n);
+ w += mpn_add_n (ws, p + n, ws, n);
+ w += mpn_add_n (p + n2, p + n2, ws, n);
+ /* TO DO: could put "if (w) { ... }" here.
+ * Less work but badly predicted branch.
+ * No measurable difference in speed on Alpha.
+ */
+ i = n + n2;
+ t = p[i] + w;
+ p[i] = t;
+ if (t < w)
+ {
+ do
+ {
+ ++i;
+ w = p[i] + 1;
+ p[i] = w;
+ }
+ while (w == 0);
+ }
+ }
+}
+
+/*-- add2Times -------------------------------------------------------------*/
+
+/* z[] = x[] + 2 * y[]
+ Note that z and x might point to the same vectors. */
+#ifdef USE_MORE_MPN
+static inline mp_limb_t
+#if __STDC__
+add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
+#else
+add2Times (z, x, y, n)
+ mp_ptr z;
+ mp_srcptr x;
+ mp_srcptr y;
+ mp_size_t n;
+#endif
+{
+ mp_ptr t;
+ mp_limb_t c;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ c = mpn_lshift (t, y, n, 1);
+ c += mpn_add_n (z, x, t, n);
+ TMP_FREE (marker);
+ return c;
+}
+#else
+
+static mp_limb_t
+#if __STDC__
+add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
+#else
+add2Times (z, x, y, n)
+ mp_ptr z;
+ mp_srcptr x;
+ mp_srcptr y;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t c, v, w;
+
+ ASSERT (n > 0);
+ v = *x; w = *y;
+ c = w >> (BITS_PER_MP_LIMB - 1);
+ w <<= 1;
+ v += w;
+ c += v < w;
+ *z = v;
+ ++x; ++y; ++z;
+ while (--n)
+ {
+ v = *x;
+ w = *y;
+ v += c;
+ c = v < c;
+ c += w >> (BITS_PER_MP_LIMB - 1);
+ w <<= 1;
+ v += w;
+ c += v < w;
+ *z = v;
+ ++x; ++y; ++z;
+ }
+
+ return c;
+}
+#endif
+
+/*-- evaluate3 -------------------------------------------------------------*/
+
+/* Evaluates:
+ * ph := 4*A+2*B+C
+ * p1 := A+B+C
+ * p2 := A+2*B+4*C
+ * where:
+ * ph[], p1[], p2[], A[] and B[] all have length len,
+ * C[] has length len2 with len-len2 = 0, 1 or 2.
+ * Returns top words (overflow) at pth, pt1 and pt2 respectively.
+ */
+#ifdef USE_MORE_MPN
+static void
+#if __STDC__
+evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
+ mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len, mp_size_t len2)
+#else
+evaluate3 (ph, p1, p2, pth, pt1, pt2,
+ A, B, C, len, len2)
+ mp_ptr ph;
+ mp_ptr p1;
+ mp_ptr p2;
+ mp_ptr pth;
+ mp_ptr pt1;
+ mp_ptr pt2;
+ mp_srcptr A;
+ mp_srcptr B;
+ mp_srcptr C;
+ mp_size_t len;
+ mp_size_t len2;
+#endif
+{
+ mp_limb_t c, d, e;
+
+ ASSERT (len - len2 <= 2);
+
+ e = mpn_lshift (p1, B, len, 1);
+
+ c = mpn_lshift (ph, A, len, 2);
+ c += e + mpn_add_n (ph, ph, p1, len);
+ d = mpn_add_n (ph, ph, C, len2);
+ if (len2 == len) c += d; else c += mpn_add_1 (ph + len2, ph + len2, len-len2, d);
+ ASSERT (c < 7);
+ *pth = c;
+
+ c = mpn_lshift (p2, C, len2, 2);
+#if 1
+ if (len2 != len) { p2[len-1] = 0; p2[len2] = c; c = 0; }
+ c += e + mpn_add_n (p2, p2, p1, len);
+#else
+ d = mpn_add_n (p2, p2, p1, len2);
+ c += d;
+ if (len2 != len) c = mpn_add_1 (p2+len2, p1+len2, len-len2, c);
+ c += e;
+#endif
+ c += mpn_add_n (p2, p2, A, len);
+ ASSERT (c < 7);
+ *pt2 = c;
+
+ c = mpn_add_n (p1, A, B, len);
+ d = mpn_add_n (p1, p1, C, len2);
+ if (len2 == len) c += d;
+ else c += mpn_add_1 (p1+len2, p1+len2, len-len2, d);
+ ASSERT (c < 3);
+ *pt1 = c;
+
+}
+
+#else
+
+static void
+#if __STDC__
+evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
+ mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls)
+#else
+evaluate3 (ph, p1, p2, pth, pt1, pt2,
+ A, B, C, l, ls)
+ mp_ptr ph;
+ mp_ptr p1;
+ mp_ptr p2;
+ mp_ptr pth;
+ mp_ptr pt1;
+ mp_ptr pt2;
+ mp_srcptr A;
+ mp_srcptr B;
+ mp_srcptr C;
+ mp_size_t l;
+ mp_size_t ls;
+#endif
+{
+ mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2;
+
+ ASSERT (l - ls <= 2);
+
+ th = t1 = t2 = 0;
+ for (i = 0; i < l; ++i)
+ {
+ a = *A;
+ b = *B;
+ c = i < ls ? *C : 0;
+
+ /* TO DO: choose one of the following alternatives. */
+#if 0
+ t = a << 2;
+ vh = th + t;
+ th = vh < t;
+ th += a >> (BITS_PER_MP_LIMB - 2);
+ t = b << 1;
+ vh += t;
+ th += vh < t;
+ th += b >> (BITS_PER_MP_LIMB - 1);
+ vh += c;
+ th += vh < c;
+#else
+ vh = th + c;
+ th = vh < c;
+ t = b << 1;
+ vh += t;
+ th += vh < t;
+ th += b >> (BITS_PER_MP_LIMB - 1);
+ t = a << 2;
+ vh += t;
+ th += vh < t;
+ th += a >> (BITS_PER_MP_LIMB - 2);
+#endif
+
+ v1 = t1 + a;
+ t1 = v1 < a;
+ v1 += b;
+ t1 += v1 < b;
+ v1 += c;
+ t1 += v1 < c;
+
+ v2 = t2 + a;
+ t2 = v2 < a;
+ t = b << 1;
+ v2 += t;
+ t2 += v2 < t;
+ t2 += b >> (BITS_PER_MP_LIMB - 1);
+ t = c << 2;
+ v2 += t;
+ t2 += v2 < t;
+ t2 += c >> (BITS_PER_MP_LIMB - 2);
+
+ *ph = vh;
+ *p1 = v1;
+ *p2 = v2;
+
+ ++A; ++B; ++C;
+ ++ph; ++p1; ++p2;
+ }
+
+ ASSERT (th < 7);
+ ASSERT (t1 < 3);
+ ASSERT (t2 < 7);
+
+ *pth = th;
+ *pt1 = t1;
+ *pt2 = t2;
+}
+#endif
+
+
+/*-- interpolate3 ----------------------------------------------------------*/
+
+/* Interpolates B, C, D (in-place) from:
+ * 16*A+8*B+4*C+2*D+E
+ * A+B+C+D+E
+ * A+2*B+4*C+8*D+16*E
+ * where:
+ * A[], B[], C[] and D[] all have length l,
+ * E[] has length ls with l-ls = 0, 2 or 4.
+ *
+ * Reads top words (from earlier overflow) from ptb, ptc and ptd,
+ * and returns new top words there.
+ */
+
+#ifdef USE_MORE_MPN
+static void
+#if __STDC__
+interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
+ mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t len, mp_size_t len2)
+#else
+interpolate3 (A, B, C, D, E,
+ ptb, ptc, ptd, len, len2)
+ mp_srcptr A;
+ mp_ptr B;
+ mp_ptr C;
+ mp_ptr D;
+ mp_srcptr E;
+ mp_ptr ptb;
+ mp_ptr ptc;
+ mp_ptr ptd;
+ mp_size_t len;
+ mp_size_t len2;
+#endif
+{
+ mp_ptr ws;
+ mp_limb_t t, tb,tc,td;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ ASSERT (len - len2 == 0 || len - len2 == 2 || len - len2 == 4);
+
+ /* Let x1, x2, x3 be the values to interpolate. We have:
+ * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e
+ * c = a + x1 + x2 + x3 + e
+ * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e
+ */
+
+ ws = (mp_ptr) TMP_ALLOC (len * BYTES_PER_MP_LIMB);
+
+ tb = *ptb; tc = *ptc; td = *ptd;
+
+
+ /* b := b - 16*a - e
+ * c := c - a - e
+ * d := d - a - 16*e
+ */
+
+ t = mpn_lshift (ws, A, len, 4);
+ tb -= t + mpn_sub_n (B, B, ws, len);
+ t = mpn_sub_n (B, B, E, len2);
+ if (len2 == len) tb -= t;
+ else tb -= mpn_sub_1 (B+len2, B+len2, len-len2, t);
+
+ tc -= mpn_sub_n (C, C, A, len);
+ t = mpn_sub_n (C, C, E, len2);
+ if (len2 == len) tc -= t;
+ else tc -= mpn_sub_1 (C+len2, C+len2, len-len2, t);
+
+ t = mpn_lshift (ws, E, len2, 4);
+ t += mpn_add_n (ws, ws, A, len2);
+#if 1
+ if (len2 != len) t = mpn_add_1 (ws+len2, A+len2, len-len2, t);
+ td -= t + mpn_sub_n (D, D, ws, len);
+#else
+ t += mpn_sub_n (D, D, ws, len2);
+ if (len2 != len) {
+ t = mpn_sub_1 (D+len2, D+len2, len-len2, t);
+ t += mpn_sub_n (D+len2, D+len2, A+len2, len-len2);
+ } /* end if/else */
+ td -= t;
+#endif
+
+
+ /* b, d := b + d, b - d */
+
+#ifdef HAVE_MPN_ADD_SUB_N
+ /* #error TO DO ... */
+#else
+ t = tb + td + mpn_add_n (ws, B, D, len);
+ td = tb - td - mpn_sub_n (D, B, D, len);
+ tb = t;
+ MPN_COPY (B, ws, len);
+#endif
+
+ /* b := b-8*c */
+ t = 8 * tc + mpn_lshift (ws, C, len, 3);
+ tb -= t + mpn_sub_n (B, B, ws, len);
+
+ /* c := 2*c - b */
+ tc = 2 * tc + mpn_lshift (C, C, len, 1);
+ tc -= tb + mpn_sub_n (C, C, B, len);
+
+ /* d := d/3 */
+ td = (td - mpn_divexact_by3 (D, D, len)) * INVERSE_3;
+
+ /* b, d := b + d, b - d */
+#ifdef HAVE_MPN_ADD_SUB_N
+ /* #error TO DO ... */
+#else
+ t = tb + td + mpn_add_n (ws, B, D, len);
+ td = tb - td - mpn_sub_n (D, B, D, len);
+ tb = t;
+ MPN_COPY (B, ws, len);
+#endif
+
+ /* Now:
+ * b = 4*x1
+ * c = 2*x2
+ * d = 4*x3
+ */
+
+ ASSERT(!(*B & 3));
+ mpn_rshift (B, B, len, 2);
+ B[len-1] |= tb<<(BITS_PER_MP_LIMB-2);
+ ASSERT((long)tb >= 0);
+ tb >>= 2;
+
+ ASSERT(!(*C & 1));
+ mpn_rshift (C, C, len, 1);
+ C[len-1] |= tc<<(BITS_PER_MP_LIMB-1);
+ ASSERT((long)tc >= 0);
+ tc >>= 1;
+
+ ASSERT(!(*D & 3));
+ mpn_rshift (D, D, len, 2);
+ D[len-1] |= td<<(BITS_PER_MP_LIMB-2);
+ ASSERT((long)td >= 0);
+ td >>= 2;
+
+#if WANT_ASSERT
+ ASSERT (tb < 2);
+ if (len == len2)
+ {
+ ASSERT (tc < 3);
+ ASSERT (td < 2);
+ }
+ else
+ {
+ ASSERT (tc < 2);
+ ASSERT (!td);
+ }
+#endif
+
+ *ptb = tb;
+ *ptc = tc;
+ *ptd = td;
+
+ TMP_FREE (marker);
+}
+
+#else
+
+static void
+#if __STDC__
+interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
+ mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t l, mp_size_t ls)
+#else
+interpolate3 (A, B, C, D, E,
+ ptb, ptc, ptd, l, ls)
+ mp_srcptr A;
+ mp_ptr B;
+ mp_ptr C;
+ mp_ptr D;
+ mp_srcptr E;
+ mp_ptr ptb;
+ mp_ptr ptc;
+ mp_ptr ptd;
+ mp_size_t l;
+ mp_size_t ls;
+#endif
+{
+ mp_limb_t a,b,c,d,e,t, i, sb,sc,sd, ob,oc,od;
+ const mp_limb_t maskOffHalf = (~(mp_limb_t) 0) << (BITS_PER_MP_LIMB >> 1);
+
+#if WANT_ASSERT
+ t = l - ls;
+ ASSERT (t == 0 || t == 2 || t == 4);
+#endif
+
+ sb = sc = sd = 0;
+ for (i = 0; i < l; ++i)
+ {
+ mp_limb_t tb, tc, td, tt;
+
+ a = *A;
+ b = *B;
+ c = *C;
+ d = *D;
+ e = i < ls ? *E : 0;
+
+ /* Let x1, x2, x3 be the values to interpolate. We have:
+ * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e
+ * c = a + x1 + x2 + x3 + e
+ * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e
+ */
+
+ /* b := b - 16*a - e
+ * c := c - a - e
+ * d := d - a - 16*e
+ */
+ t = a << 4;
+ tb = -(a >> (BITS_PER_MP_LIMB - 4)) - (b < t);
+ b -= t;
+ tb -= b < e;
+ b -= e;
+ tc = -(c < a);
+ c -= a;
+ tc -= c < e;
+ c -= e;
+ td = -(d < a);
+ d -= a;
+ t = e << 4;
+ td = td - (e >> (BITS_PER_MP_LIMB - 4)) - (d < t);
+ d -= t;
+
+ /* b, d := b + d, b - d */
+ t = b + d;
+ tt = tb + td + (t < b);
+ td = tb - td - (b < d);
+ d = b - d;
+ b = t;
+ tb = tt;
+
+ /* b := b-8*c */
+ t = c << 3;
+ tb = tb - (tc << 3) - (c >> (BITS_PER_MP_LIMB - 3)) - (b < t);
+ b -= t;
+
+ /* c := 2*c - b */
+ t = c << 1;
+ tc = (tc << 1) + (c >> (BITS_PER_MP_LIMB - 1)) - tb - (t < b);
+ c = t - b;
+
+ /* d := d/3 */
+ d *= INVERSE_3;
+ td = td - (d >> (BITS_PER_MP_LIMB - 1)) - (d*3 < d);
+ td *= INVERSE_3;
+
+ /* b, d := b + d, b - d */
+ t = b + d;
+ tt = tb + td + (t < b);
+ td = tb - td - (b < d);
+ d = b - d;
+ b = t;
+ tb = tt;
+
+ /* Now:
+ * b = 4*x1
+ * c = 2*x2
+ * d = 4*x3
+ */
+
+ /* sb has period 2. */
+ b += sb;
+ tb += b < sb;
+ sb &= maskOffHalf;
+ sb |= sb >> (BITS_PER_MP_LIMB >> 1);
+ sb += tb;
+
+ /* sc has period 1. */
+ c += sc;
+ tc += c < sc;
+ /* TO DO: choose one of the following alternatives. */
+#if 1
+ sc = (mp_limb_t)((long)sc >> (BITS_PER_MP_LIMB - 1));
+ sc += tc;
+#else
+ sc = tc - ((long)sc < 0L);
+#endif
+
+ /* sd has period 2. */
+ d += sd;
+ td += d < sd;
+ sd &= maskOffHalf;
+ sd |= sd >> (BITS_PER_MP_LIMB >> 1);
+ sd += td;
+
+ if (i != 0)
+ {
+ B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
+ C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
+ D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
+ }
+ ob = b >> 2;
+ oc = c >> 1;
+ od = d >> 2;
+
+ ++A; ++B; ++C; ++D; ++E;
+ }
+
+ /* Handle top words. */
+ b = *ptb;
+ c = *ptc;
+ d = *ptd;
+
+ t = b + d;
+ d = b - d;
+ b = t;
+ b -= c << 3;
+ c = (c << 1) - b;
+ d *= INVERSE_3;
+ t = b + d;
+ d = b - d;
+ b = t;
+
+ b += sb;
+ c += sc;
+ d += sd;
+
+ B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
+ C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
+ D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
+
+ b >>= 2;
+ c >>= 1;
+ d >>= 2;
+
+#if WANT_ASSERT
+ ASSERT (b < 2);
+ if (l == ls)
+ {
+ ASSERT (c < 3);
+ ASSERT (d < 2);
+ }
+ else
+ {
+ ASSERT (c < 2);
+ ASSERT (!d);
+ }
+#endif
+
+ *ptb = b;
+ *ptc = c;
+ *ptd = d;
+}
+#endif
+
+
+/*-- mpn_toom3_mul_n --------------------------------------------------------------*/
+
+/* Multiplies using 5 mults of one third size and so on recursively.
+ * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
+ * No overlap of p[...] with a[...] or b[...].
+ * ws is workspace.
+ */
+
+/* TO DO: If TOOM3_MUL_THRESHOLD is much bigger than KARATSUBA_MUL_THRESHOLD then the
+ * recursion in mpn_toom3_mul_n() will always bottom out with mpn_kara_mul_n()
+ * because the "n < KARATSUBA_MUL_THRESHOLD" test here will always be false.
+ */
+
+#define TOOM3_MUL_REC(p, a, b, n, ws) \
+ do { \
+ if (n < KARATSUBA_MUL_THRESHOLD) \
+ mpn_mul_basecase (p, a, n, b, n); \
+ else if (n < TOOM3_MUL_THRESHOLD) \
+ mpn_kara_mul_n (p, a, b, n, ws); \
+ else \
+ mpn_toom3_mul_n (p, a, b, n, ws); \
+ } while (0)
+
+void
+#if __STDC__
+mpn_toom3_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
+#else
+mpn_toom3_mul_n (p, a, b, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_srcptr b;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t cB,cC,cD, dB,dC,dD, tB,tC,tD;
+ mp_limb_t *A,*B,*C,*D,*E, *W;
+ mp_size_t l,l2,l3,l4,l5,ls;
+
+ /* Break n words into chunks of size l, l and ls.
+ * n = 3*k => l = k, ls = k
+ * n = 3*k+1 => l = k+1, ls = k-1
+ * n = 3*k+2 => l = k+1, ls = k
+ */
+ {
+ mp_limb_t m;
+
+ ASSERT (n >= TOOM3_MUL_THRESHOLD);
+ l = ls = n / 3;
+ m = n - l * 3;
+ if (m != 0)
+ ++l;
+ if (m == 1)
+ --ls;
+
+ l2 = l * 2;
+ l3 = l * 3;
+ l4 = l * 4;
+ l5 = l * 5;
+ A = p;
+ B = ws;
+ C = p + l2;
+ D = ws + l2;
+ E = p + l4;
+ W = ws + l4;
+ }
+
+ /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/
+ evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
+ evaluate3 (A + l, B + l, C + l, &dB, &dC, &dD, b, b + l, b + l2, l, ls);
+
+ /** Second stage: pointwise multiplies. **/
+ TOOM3_MUL_REC(D, C, C + l, l, W);
+ tD = cD*dD;
+ if (cD) tD += mpn_addmul_1 (D + l, C + l, l, cD);
+ if (dD) tD += mpn_addmul_1 (D + l, C, l, dD);
+ ASSERT (tD < 49);
+ TOOM3_MUL_REC(C, B, B + l, l, W);
+ tC = cC*dC;
+ /* TO DO: choose one of the following alternatives. */
+#if 0
+ if (cC) tC += mpn_addmul_1 (C + l, B + l, l, cC);
+ if (dC) tC += mpn_addmul_1 (C + l, B, l, dC);
+#else
+ if (cC)
+ {
+ if (cC == 1) tC += mpn_add_n (C + l, C + l, B + l, l);
+ else tC += add2Times (C + l, C + l, B + l, l);
+ }
+ if (dC)
+ {
+ if (dC == 1) tC += mpn_add_n (C + l, C + l, B, l);
+ else tC += add2Times (C + l, C + l, B, l);
+ }
+#endif
+ ASSERT (tC < 9);
+ TOOM3_MUL_REC(B, A, A + l, l, W);
+ tB = cB*dB;
+ if (cB) tB += mpn_addmul_1 (B + l, A + l, l, cB);
+ if (dB) tB += mpn_addmul_1 (B + l, A, l, dB);
+ ASSERT (tB < 49);
+ TOOM3_MUL_REC(A, a, b, l, W);
+ TOOM3_MUL_REC(E, a + l2, b + l2, ls, W);
+
+ /** Third stage: interpolation. **/
+ interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
+
+ /** Final stage: add up the coefficients. **/
+ {
+ mp_limb_t i, x, y;
+ tB += mpn_add_n (p + l, p + l, B, l2);
+ tD += mpn_add_n (p + l3, p + l3, D, l2);
+ mpn_incr_u (p + l3, tB);
+ mpn_incr_u (p + l4, tC);
+ mpn_incr_u (p + l5, tD);
+ }
+}
+
+/*-- mpn_toom3_sqr_n --------------------------------------------------------------*/
+
+/* Like previous function but for squaring */
+
+#define TOOM3_SQR_REC(p, a, n, ws) \
+ do { \
+ if (n < KARATSUBA_SQR_THRESHOLD) \
+ mpn_sqr_basecase (p, a, n); \
+ else if (n < TOOM3_SQR_THRESHOLD) \
+ mpn_kara_sqr_n (p, a, n, ws); \
+ else \
+ mpn_toom3_sqr_n (p, a, n, ws); \
+ } while (0)
+
+void
+#if __STDC__
+mpn_toom3_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
+#else
+mpn_toom3_sqr_n (p, a, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t cB,cC,cD, tB,tC,tD;
+ mp_limb_t *A,*B,*C,*D,*E, *W;
+ mp_size_t l,l2,l3,l4,l5,ls;
+
+ /* Break n words into chunks of size l, l and ls.
+ * n = 3*k => l = k, ls = k
+ * n = 3*k+1 => l = k+1, ls = k-1
+ * n = 3*k+2 => l = k+1, ls = k
+ */
+ {
+ mp_limb_t m;
+
+ ASSERT (n >= TOOM3_MUL_THRESHOLD);
+ l = ls = n / 3;
+ m = n - l * 3;
+ if (m != 0)
+ ++l;
+ if (m == 1)
+ --ls;
+
+ l2 = l * 2;
+ l3 = l * 3;
+ l4 = l * 4;
+ l5 = l * 5;
+ A = p;
+ B = ws;
+ C = p + l2;
+ D = ws + l2;
+ E = p + l4;
+ W = ws + l4;
+ }
+
+ /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/
+ evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
+
+ /** Second stage: pointwise multiplies. **/
+ TOOM3_SQR_REC(D, C, l, W);
+ tD = cD*cD;
+ if (cD) tD += mpn_addmul_1 (D + l, C, l, 2*cD);
+ ASSERT (tD < 49);
+ TOOM3_SQR_REC(C, B, l, W);
+ tC = cC*cC;
+ /* TO DO: choose one of the following alternatives. */
+#if 0
+ if (cC) tC += mpn_addmul_1 (C + l, B, l, 2*cC);
+#else
+ if (cC >= 1)
+ {
+ tC += add2Times (C + l, C + l, B, l);
+ if (cC == 2)
+ tC += add2Times (C + l, C + l, B, l);
+ }
+#endif
+ ASSERT (tC < 9);
+ TOOM3_SQR_REC(B, A, l, W);
+ tB = cB*cB;
+ if (cB) tB += mpn_addmul_1 (B + l, A, l, 2*cB);
+ ASSERT (tB < 49);
+ TOOM3_SQR_REC(A, a, l, W);
+ TOOM3_SQR_REC(E, a + l2, ls, W);
+
+ /** Third stage: interpolation. **/
+ interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
+
+ /** Final stage: add up the coefficients. **/
+ {
+ mp_limb_t i, x, y;
+ tB += mpn_add_n (p + l, p + l, B, l2);
+ tD += mpn_add_n (p + l3, p + l3, D, l2);
+ mpn_incr_u (p + l3, tB);
+ mpn_incr_u (p + l4, tC);
+ mpn_incr_u (p + l5, tD);
+ }
+}
+
+void
+#if __STDC__
+mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
+#else
+mpn_mul_n (p, a, b, n)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_srcptr b;
+ mp_size_t n;
+#endif
+{
+ if (n < KARATSUBA_MUL_THRESHOLD)
+ mpn_mul_basecase (p, a, n, b, n);
+ else if (n < TOOM3_MUL_THRESHOLD)
+ {
+ /* Allocate workspace of fixed size on stack: fast! */
+#if TUNE_PROGRAM_BUILD
+ mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD_LIMIT-1) + 2 * BITS_PER_MP_LIMB];
+#else
+ mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD-1) + 2 * BITS_PER_MP_LIMB];
+#endif
+ mpn_kara_mul_n (p, a, b, n, ws);
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else if (n < FFT_MUL_THRESHOLD)
+#else
+ else
+#endif
+ {
+ /* Use workspace of unknown size in heap, as stack space may
+ * be limited. Since n is at least TOOM3_MUL_THRESHOLD, the
+ * multiplication will take much longer than malloc()/free(). */
+ mp_limb_t wsLen, *ws;
+ wsLen = 2 * n + 3 * BITS_PER_MP_LIMB;
+ ws = (mp_ptr) (*_mp_allocate_func) ((size_t) wsLen * sizeof (mp_limb_t));
+ mpn_toom3_mul_n (p, a, b, n, ws);
+ (*_mp_free_func) (ws, (size_t) wsLen * sizeof (mp_limb_t));
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else
+ {
+ mpn_mul_fft_full (p, a, n, b, n);
+ }
+#endif
+}
diff --git a/rts/gmp/mpn/generic/perfsqr.c b/rts/gmp/mpn/generic/perfsqr.c
new file mode 100644
index 0000000000..42ee3405d7
--- /dev/null
+++ b/rts/gmp/mpn/generic/perfsqr.c
@@ -0,0 +1,123 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+ zero otherwise.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue
+ modulo 0x100. */
+static unsigned char const sq_res_0x100[0x100] =
+{
+ 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+};
+
+int
+#if __STDC__
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+#else
+mpn_perfect_square_p (up, usize)
+ mp_srcptr up;
+ mp_size_t usize;
+#endif
+{
+ mp_limb_t rem;
+ mp_ptr root_ptr;
+ int res;
+ TMP_DECL (marker);
+
+ /* The first test excludes 55/64 (85.9%) of the perfect square candidates
+ in O(1) time. */
+ if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0)
+ return 0;
+
+#if defined (PP)
+ /* The second test excludes 30652543/30808063 (99.5%) of the remaining
+ perfect square candidates in O(n) time. */
+
+ /* Firstly, compute REM = A mod PP. */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6))
+ rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED);
+ else
+ rem = mpn_mod_1 (up, usize, (mp_limb_t) PP);
+
+ /* Now decide if REM is a quadratic residue modulo the factors in PP. */
+
+ /* If A is just a few limbs, computing the square root does not take long
+ time, so things might run faster if we limit this loop according to the
+ size of A. */
+
+#if BITS_PER_MP_LIMB == 64
+ if (((CNST_LIMB(0x12DD703303AED3) >> rem % 53) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x4351B2753DF) >> rem % 47) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x35883A3EE53) >> rem % 43) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x1B382B50737) >> rem % 41) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x165E211E9B) >> rem % 37) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x121D47B7) >> rem % 31) & 1) == 0)
+ return 0;
+#endif
+ if (((0x13D122F3L >> rem % 29) & 1) == 0)
+ return 0;
+ if (((0x5335FL >> rem % 23) & 1) == 0)
+ return 0;
+ if (((0x30AF3L >> rem % 19) & 1) == 0)
+ return 0;
+ if (((0x1A317L >> rem % 17) & 1) == 0)
+ return 0;
+ if (((0x161BL >> rem % 13) & 1) == 0)
+ return 0;
+ if (((0x23BL >> rem % 11) & 1) == 0)
+ return 0;
+ if (((0x017L >> rem % 7) & 1) == 0)
+ return 0;
+ if (((0x13L >> rem % 5) & 1) == 0)
+ return 0;
+ if (((0x3L >> rem % 3) & 1) == 0)
+ return 0;
+#endif
+
+ TMP_MARK (marker);
+
+ /* For the third and last test, we finally compute the square root,
+ to make sure we've really got a perfect square. */
+ root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB);
+
+ /* Iff mpn_sqrtrem returns zero, the square is perfect. */
+ res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+ TMP_FREE (marker);
+ return res;
+}
diff --git a/rts/gmp/mpn/generic/popcount.c b/rts/gmp/mpn/generic/popcount.c
new file mode 100644
index 0000000000..387be9536d
--- /dev/null
+++ b/rts/gmp/mpn/generic/popcount.c
@@ -0,0 +1,93 @@
+/* popcount.c
+
+Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined __GNUC__
+/* No processor claiming to be SPARC v9 compliant seem to
+ implement the POPC instruction. Disable pattern for now. */
+#if 0 && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+#define popc_limb(a) \
+ ({ \
+ DItype __res; \
+ asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \
+ __res; \
+ })
+#endif
+#endif
+
+#ifndef popc_limb
+
+/* Cool population count of a mp_limb_t.
+ You have to figure out how this works, I won't tell you! */
+
+static inline unsigned int
+#if __STDC__
+popc_limb (mp_limb_t x)
+#else
+popc_limb (x)
+ mp_limb_t x;
+#endif
+{
+#if BITS_PER_MP_LIMB == 64
+ /* We have to go into some trouble to define these constants.
+ (For mp_limb_t being `long long'.) */
+ mp_limb_t cnst;
+ cnst = 0xaaaaaaaaL | ((mp_limb_t) 0xaaaaaaaaL << BITS_PER_MP_LIMB/2);
+ x -= (x & cnst) >> 1;
+ cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2);
+ x = ((x & ~cnst) >> 2) + (x & cnst);
+ cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2);
+ x = ((x >> 4) + x) & cnst;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x);
+ x = ((x >> 32) + x) & 0xff;
+#endif
+#if BITS_PER_MP_LIMB == 32
+ x -= (x & 0xaaaaaaaa) >> 1;
+ x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L);
+ x = ((x >> 4) + x) & 0x0f0f0f0fL;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x) & 0xff;
+#endif
+ return x;
+}
+#endif
+
+unsigned long int
+#if __STDC__
+mpn_popcount (register mp_srcptr p, register mp_size_t size)
+#else
+mpn_popcount (p, size)
+ register mp_srcptr p;
+ register mp_size_t size;
+#endif
+{
+ unsigned long int popcnt;
+ mp_size_t i;
+
+ popcnt = 0;
+ for (i = 0; i < size; i++)
+ popcnt += popc_limb (p[i]);
+
+ return popcnt;
+}
diff --git a/rts/gmp/mpn/generic/pre_mod_1.c b/rts/gmp/mpn/generic/pre_mod_1.c
new file mode 100644
index 0000000000..27179683b3
--- /dev/null
+++ b/rts/gmp/mpn/generic/pre_mod_1.c
@@ -0,0 +1,69 @@
+/* mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb,
+ divisor_limb_inverted) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by the normalized DIVISOR_LIMB.
+ DIVISOR_LIMB_INVERTED should be 2^(2*BITS_PER_MP_LIMB) / DIVISOR_LIMB +
+ - 2^BITS_PER_MP_LIMB.
+ Return the single-limb remainder.
+
+Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_preinv_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb_t divisor_limb, mp_limb_t divisor_limb_inverted)
+#else
+mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, divisor_limb_inverted)
+ mp_srcptr dividend_ptr;
+ mp_size_t dividend_size;
+ mp_limb_t divisor_limb;
+ mp_limb_t divisor_limb_inverted;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t n0, r;
+ int dummy;
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (dummy, r, r, n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+}
diff --git a/rts/gmp/mpn/generic/random.c b/rts/gmp/mpn/generic/random.c
new file mode 100644
index 0000000000..dea4e20e56
--- /dev/null
+++ b/rts/gmp/mpn/generic/random.c
@@ -0,0 +1,43 @@
+/* mpn_random -- Generate random numbers.
+
+Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "urandom.h"
+
+void
+#if __STDC__
+mpn_random (mp_ptr res_ptr, mp_size_t size)
+#else
+mpn_random (res_ptr, size)
+ mp_ptr res_ptr;
+ mp_size_t size;
+#endif
+{
+ mp_size_t i;
+
+ for (i = 0; i < size; i++)
+ res_ptr[i] = urandom ();
+
+ /* Make sure the most significant limb is non-zero. */
+ while (res_ptr[size - 1] == 0)
+ res_ptr[size - 1] = urandom ();
+}
diff --git a/rts/gmp/mpn/generic/random2.c b/rts/gmp/mpn/generic/random2.c
new file mode 100644
index 0000000000..86682f81fa
--- /dev/null
+++ b/rts/gmp/mpn/generic/random2.c
@@ -0,0 +1,105 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+ of ones and zeroes. Suitable for border testing.
+
+Copyright (C) 1992, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined (__hpux) || defined (__alpha) || defined (__svr4__) || defined (__SVR4)
+/* HPUX lacks random(). DEC OSF/1 1.2 random() returns a double. */
+long mrand48 ();
+static inline long
+random ()
+{
+ return mrand48 ();
+}
+#elif defined(_WIN32) && !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+/* MS CRT supplies just the poxy rand(), with an upper bound of 0x7fff */
+static inline unsigned long
+random ()
+{
+ return rand () ^ (rand () << 16) ^ (rand() << 32);
+}
+
+#else
+long random ();
+#endif
+
+/* It's a bit tricky to get this right, so please test the code well
+ if you hack with it. Some early versions of the function produced
+ random numbers with the leading limb == 0, and some versions never
+ made the most significant bit set. */
+
+void
+#if __STDC__
+mpn_random2 (mp_ptr res_ptr, mp_size_t size)
+#else
+mpn_random2 (res_ptr, size)
+ mp_ptr res_ptr;
+ mp_size_t size;
+#endif
+{
+ int n_bits;
+ int bit_pos;
+ mp_size_t limb_pos;
+ unsigned int ran;
+ mp_limb_t limb;
+
+ limb = 0;
+
+ /* Start off in a random bit position in the most significant limb. */
+ bit_pos = random () & (BITS_PER_MP_LIMB - 1);
+
+ /* Least significant bit of RAN chooses string of ones/string of zeroes.
+ Make most significant limb be non-zero by setting bit 0 of RAN. */
+ ran = random () | 1;
+
+ for (limb_pos = size - 1; limb_pos >= 0; )
+ {
+ n_bits = (ran >> 1) % BITS_PER_MP_LIMB + 1;
+ if ((ran & 1) != 0)
+ {
+ /* Generate a string of ones. */
+ if (n_bits >= bit_pos)
+ {
+ res_ptr[limb_pos--] = limb | ((((mp_limb_t) 2) << bit_pos) - 1);
+ bit_pos += BITS_PER_MP_LIMB;
+ limb = (~(mp_limb_t) 0) << (bit_pos - n_bits);
+ }
+ else
+ {
+ limb |= ((((mp_limb_t) 1) << n_bits) - 1) << (bit_pos - n_bits + 1);
+ }
+ }
+ else
+ {
+ /* Generate a string of zeroes. */
+ if (n_bits >= bit_pos)
+ {
+ res_ptr[limb_pos--] = limb;
+ limb = 0;
+ bit_pos += BITS_PER_MP_LIMB;
+ }
+ }
+ bit_pos -= n_bits;
+ ran = random ();
+ }
+}
diff --git a/rts/gmp/mpn/generic/rshift.c b/rts/gmp/mpn/generic/rshift.c
new file mode 100644
index 0000000000..59caf73529
--- /dev/null
+++ b/rts/gmp/mpn/generic/rshift.c
@@ -0,0 +1,88 @@
+/* mpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ and store the USIZE least significant limbs of the result at WP.
+ The bits shifted out to the right are returned.
+
+ Argument constraints:
+ 1. 0 < CNT < BITS_PER_MP_LIMB
+ 2. If the result is to be written over the input, WP must be <= UP.
+*/
+
+mp_limb_t
+#if __STDC__
+mpn_rshift (register mp_ptr wp,
+ register mp_srcptr up, mp_size_t usize,
+ register unsigned int cnt)
+#else
+mpn_rshift (wp, up, usize, cnt)
+ register mp_ptr wp;
+ register mp_srcptr up;
+ mp_size_t usize;
+ register unsigned int cnt;
+#endif
+{
+ register mp_limb_t high_limb, low_limb;
+ register unsigned sh_1, sh_2;
+ register mp_size_t i;
+ mp_limb_t retval;
+
+#ifdef DEBUG
+ if (usize == 0 || cnt == 0)
+ abort ();
+#endif
+
+ sh_1 = cnt;
+
+#if 0
+ if (sh_1 == 0)
+ {
+ if (wp != up)
+ {
+ /* Copy from low end to high end, to allow specified input/output
+ overlapping. */
+ for (i = 0; i < usize; i++)
+ wp[i] = up[i];
+ }
+ return usize;
+ }
+#endif
+
+ wp -= 1;
+ sh_2 = BITS_PER_MP_LIMB - sh_1;
+ high_limb = up[0];
+ retval = high_limb << sh_2;
+ low_limb = high_limb;
+
+ for (i = 1; i < usize; i++)
+ {
+ high_limb = up[i];
+ wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+ low_limb = high_limb;
+ }
+ wp[i] = low_limb >> sh_1;
+
+ return retval;
+}
diff --git a/rts/gmp/mpn/generic/sb_divrem_mn.c b/rts/gmp/mpn/generic/sb_divrem_mn.c
new file mode 100644
index 0000000000..a269e34f5f
--- /dev/null
+++ b/rts/gmp/mpn/generic/sb_divrem_mn.c
@@ -0,0 +1,201 @@
+/* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
+ quotient.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+ FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ the NSIZE-DSIZE least significant quotient limbs at QP
+ and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
+ non-zero, generate that many fraction bits and append them after the
+ other quotient limbs.
+ Return the most significant limb of the quotient, this is always 0 or 1.
+
+ Preconditions:
+ 0. NSIZE >= DSIZE.
+ 1. The most significant bit of the divisor must be set.
+ 2. QP must either not overlap with the input operands at all, or
+ QP + DSIZE >= NP must hold true. (This means that it's
+ possible to put the quotient in the high part of NUM, right after the
+ remainder in NUM.
+ 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ 4. DSIZE >= 2. */
+
+
+#define PREINVERT_VIABLE \
+ (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
+
+mp_limb_t
+#if __STDC__
+mpn_sb_divrem_mn (mp_ptr qp,
+ mp_ptr np, mp_size_t nsize,
+ mp_srcptr dp, mp_size_t dsize)
+#else
+mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_size_t nsize;
+ mp_srcptr dp;
+ mp_size_t dsize;
+#endif
+{
+ mp_limb_t most_significant_q_limb = 0;
+ mp_size_t i;
+ mp_limb_t dx, d1, n0;
+ mp_limb_t dxinv;
+ int have_preinv;
+
+ ASSERT_ALWAYS (dsize > 2);
+
+ np += nsize - dsize;
+ dx = dp[dsize - 1];
+ d1 = dp[dsize - 2];
+ n0 = np[dsize - 1];
+
+ if (n0 >= dx)
+ {
+ if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
+ {
+ mpn_sub_n (np, np, dp, dsize);
+ most_significant_q_limb = 1;
+ }
+ }
+
+ /* If multiplication is much faster than division, preinvert the
+ most significant divisor limb before entering the loop. */
+ if (PREINVERT_VIABLE)
+ {
+ have_preinv = 0;
+ if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
+ {
+ invert_limb (dxinv, dx);
+ have_preinv = 1;
+ }
+ }
+
+ for (i = nsize - dsize - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ mp_limb_t nx;
+ mp_limb_t cy_limb;
+
+ nx = np[dsize - 1];
+ np--;
+
+ if (nx == dx)
+ {
+ /* This might over-estimate q, but it's probably not worth
+ the extra code here to find out. */
+ q = ~(mp_limb_t) 0;
+
+#if 1
+ cy_limb = mpn_submul_1 (np, dp, dsize, q);
+#else
+ /* This should be faster on many machines */
+ cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
+ cy = mpn_add_n (np, np, dp, dsize);
+ np[dsize] += cy;
+#endif
+
+ if (nx != cy_limb)
+ {
+ mpn_add_n (np, np, dp, dsize);
+ q--;
+ }
+
+ qp[i] = q;
+ }
+ else
+ {
+ mp_limb_t rx, r1, r0, p1, p0;
+
+ /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register
+ usage when np[dsize-1] is used in an asm statement like
+ umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due
+ to registers being clobbered. gcc 2.95 i386 doesn't have the
+ problem. */
+ {
+ mp_limb_t workaround = np[dsize - 1];
+ if (PREINVERT_VIABLE && have_preinv)
+ udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
+ else
+ udiv_qrnnd (q, r1, nx, workaround, dx);
+ }
+ umul_ppmm (p1, p0, d1, q);
+
+ r0 = np[dsize - 2];
+ rx = 0;
+ if (r1 < p1 || (r1 == p1 && r0 < p0))
+ {
+ p1 -= p0 < d1;
+ p0 -= d1;
+ q--;
+ r1 += dx;
+ rx = r1 < dx;
+ }
+
+ p1 += r0 < p0; /* cannot carry! */
+ rx -= r1 < p1; /* may become 11..1 if q is still too large */
+ r1 -= p1;
+ r0 -= p0;
+
+ cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
+
+ {
+ mp_limb_t cy1, cy2;
+ cy1 = r0 < cy_limb;
+ r0 -= cy_limb;
+ cy2 = r1 < cy1;
+ r1 -= cy1;
+ np[dsize - 1] = r1;
+ np[dsize - 2] = r0;
+ if (cy2 != rx)
+ {
+ mpn_add_n (np, np, dp, dsize);
+ q--;
+ }
+ }
+ qp[i] = q;
+ }
+ }
+
+ /* ______ ______ ______
+ |__rx__|__r1__|__r0__| partial remainder
+ ______ ______
+ - |__p1__|__p0__| partial product to subtract
+ ______ ______
+ - |______|cylimb|
+
+ rx is -1, 0 or 1. If rx=1, then q is correct (it should match
+ carry out). If rx=-1 then q is too large. If rx=0, then q might
+ be too large, but it is most likely correct.
+ */
+
+ return most_significant_q_limb;
+}
diff --git a/rts/gmp/mpn/generic/scan0.c b/rts/gmp/mpn/generic/scan0.c
new file mode 100644
index 0000000000..96f05ce854
--- /dev/null
+++ b/rts/gmp/mpn/generic/scan0.c
@@ -0,0 +1,62 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Design issues:
+ 1. What if starting_bit is not within U? Caller's problem?
+ 2. Bit index should be 'unsigned'?
+
+ Argument constraints:
+ 1. U must sooner ot later have a limb with a clear bit.
+ */
+
+unsigned long int
+#if __STDC__
+mpn_scan0 (register mp_srcptr up,
+ register unsigned long int starting_bit)
+#else
+mpn_scan0 (up, starting_bit)
+ register mp_srcptr up;
+ register unsigned long int starting_bit;
+#endif
+{
+ mp_size_t starting_word;
+ mp_limb_t alimb;
+ int cnt;
+ mp_srcptr p;
+
+ /* Start at the word implied by STARTING_BIT. */
+ starting_word = starting_bit / BITS_PER_MP_LIMB;
+ p = up + starting_word;
+ alimb = ~*p++;
+
+ /* Mask off any bits before STARTING_BIT in the first limb. */
+ alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB);
+
+ while (alimb == 0)
+ alimb = ~*p++;
+
+ count_leading_zeros (cnt, alimb & -alimb);
+ return (p - up) * BITS_PER_MP_LIMB - 1 - cnt;
+}
diff --git a/rts/gmp/mpn/generic/scan1.c b/rts/gmp/mpn/generic/scan1.c
new file mode 100644
index 0000000000..98e2e0dcc0
--- /dev/null
+++ b/rts/gmp/mpn/generic/scan1.c
@@ -0,0 +1,62 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Design issues:
+ 1. What if starting_bit is not within U? Caller's problem?
+ 2. Bit index should be 'unsigned'?
+
+ Argument constraints:
+ 1. U must sooner ot later have a limb != 0.
+ */
+
+unsigned long int
+#if __STDC__
+mpn_scan1 (register mp_srcptr up,
+ register unsigned long int starting_bit)
+#else
+mpn_scan1 (up, starting_bit)
+ register mp_srcptr up;
+ register unsigned long int starting_bit;
+#endif
+{
+ mp_size_t starting_word;
+ mp_limb_t alimb;
+ int cnt;
+ mp_srcptr p;
+
+ /* Start at the word implied by STARTING_BIT. */
+ starting_word = starting_bit / BITS_PER_MP_LIMB;
+ p = up + starting_word;
+ alimb = *p++;
+
+ /* Mask off any bits before STARTING_BIT in the first limb. */
+ alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB);
+
+ while (alimb == 0)
+ alimb = *p++;
+
+ count_leading_zeros (cnt, alimb & -alimb);
+ return (p - up) * BITS_PER_MP_LIMB - 1 - cnt;
+}
diff --git a/rts/gmp/mpn/generic/set_str.c b/rts/gmp/mpn/generic/set_str.c
new file mode 100644
index 0000000000..e6ccc92154
--- /dev/null
+++ b/rts/gmp/mpn/generic/set_str.c
@@ -0,0 +1,159 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base)
+ -- Convert a STR_LEN long base BASE byte string pointed to by STR to a
+ limb vector pointed to by RES_PTR. Return the number of limbs in
+ RES_PTR.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_size_t
+#if __STDC__
+mpn_set_str (mp_ptr xp, const unsigned char *str, size_t str_len, int base)
+#else
+mpn_set_str (xp, str, str_len, base)
+ mp_ptr xp;
+ const unsigned char *str;
+ size_t str_len;
+ int base;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t big_base;
+ int indigits_per_limb;
+ mp_limb_t res_digit;
+
+ big_base = __mp_bases[base].big_base;
+ indigits_per_limb = __mp_bases[base].chars_per_limb;
+
+/* size = str_len / indigits_per_limb + 1; */
+
+ size = 0;
+
+ if ((base & (base - 1)) == 0)
+ {
+ /* The base is a power of 2. Read the input string from
+ least to most significant character/digit. */
+
+ const unsigned char *s;
+ int next_bitpos;
+ int bits_per_indigit = big_base;
+
+ res_digit = 0;
+ next_bitpos = 0;
+
+ for (s = str + str_len - 1; s >= str; s--)
+ {
+ int inp_digit = *s;
+
+ res_digit |= (mp_limb_t) inp_digit << next_bitpos;
+ next_bitpos += bits_per_indigit;
+ if (next_bitpos >= BITS_PER_MP_LIMB)
+ {
+ xp[size++] = res_digit;
+ next_bitpos -= BITS_PER_MP_LIMB;
+ res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+ }
+ }
+
+ if (res_digit != 0)
+ xp[size++] = res_digit;
+ }
+ else
+ {
+ /* General case. The base is not a power of 2. */
+
+ size_t i;
+ int j;
+ mp_limb_t cy_limb;
+
+ for (i = indigits_per_limb; i < str_len; i += indigits_per_limb)
+ {
+ res_digit = *str++;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = 1; j < indigits_per_limb; j++)
+ res_digit = res_digit * 10 + *str++;
+ }
+ else
+ {
+ for (j = 1; j < indigits_per_limb; j++)
+ res_digit = res_digit * base + *str++;
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ xp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+ cy_limb = mpn_mul_1 (xp, xp, size, big_base);
+ cy_limb += mpn_add_1 (xp, xp, size, res_digit);
+ if (cy_limb != 0)
+ xp[size++] = cy_limb;
+ }
+ }
+
+ big_base = base;
+ res_digit = *str++;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = 1; j < str_len - (i - indigits_per_limb); j++)
+ {
+ res_digit = res_digit * 10 + *str++;
+ big_base *= 10;
+ }
+ }
+ else
+ {
+ for (j = 1; j < str_len - (i - indigits_per_limb); j++)
+ {
+ res_digit = res_digit * base + *str++;
+ big_base *= base;
+ }
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ xp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+ cy_limb = mpn_mul_1 (xp, xp, size, big_base);
+ cy_limb += mpn_add_1 (xp, xp, size, res_digit);
+ if (cy_limb != 0)
+ xp[size++] = cy_limb;
+ }
+ }
+
+ return size;
+}
diff --git a/rts/gmp/mpn/generic/sqr_basecase.c b/rts/gmp/mpn/generic/sqr_basecase.c
new file mode 100644
index 0000000000..760258a3e0
--- /dev/null
+++ b/rts/gmp/mpn/generic/sqr_basecase.c
@@ -0,0 +1,83 @@
+/* mpn_sqr_basecase -- Internal routine to square two natural numbers
+ of length m and n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n)
+#else
+mpn_sqr_basecase (prodp, up, n)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t n;
+#endif
+{
+ mp_size_t i;
+
+ {
+ /* N.B.! We need the superfluous indirection through argh to work around
+ a reloader bug in GCC 2.7.*. */
+ mp_limb_t x;
+ mp_limb_t argh;
+ x = up[0];
+ umul_ppmm (argh, prodp[0], x, x);
+ prodp[1] = argh;
+ }
+ if (n > 1)
+ {
+ mp_limb_t tarr[2 * KARATSUBA_SQR_THRESHOLD];
+ mp_ptr tp = tarr;
+ mp_limb_t cy;
+
+ /* must fit 2*n limbs in tarr */
+ ASSERT (n <= KARATSUBA_SQR_THRESHOLD);
+
+ cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+ tp[n - 1] = cy;
+ for (i = 2; i < n; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+ tp[n + i - 2] = cy;
+ }
+ for (i = 1; i < n; i++)
+ {
+ mp_limb_t x;
+ x = up[i];
+ umul_ppmm (prodp[2 * i + 1], prodp[2 * i], x, x);
+ }
+ {
+ mp_limb_t cy;
+ cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+ cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2);
+ prodp[2 * n - 1] += cy;
+ }
+ }
+}
diff --git a/rts/gmp/mpn/generic/sqrtrem.c b/rts/gmp/mpn/generic/sqrtrem.c
new file mode 100644
index 0000000000..ee3b5144dd
--- /dev/null
+++ b/rts/gmp/mpn/generic/sqrtrem.c
@@ -0,0 +1,509 @@
+/* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
+
+ Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR.
+ Write the remainder at REM_PTR, if REM_PTR != NULL.
+ Return the size of the remainder.
+ (The size of the root is always half of the size of the operand.)
+
+ OP_PTR and ROOT_PTR may not point to the same object.
+ OP_PTR and REM_PTR may point to the same object.
+
+ If REM_PTR is NULL, only the root is computed and the return value of
+ the function is 0 if OP is a perfect square, and *any* non-zero number
+ otherwise.
+
+Copyright (C) 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* This code is just correct if "unsigned char" has at least 8 bits. It
+ doesn't help to use CHAR_BIT from limits.h, as the real problem is
+ the static arrays. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Square root algorithm:
+
+ 1. Shift OP (the input) to the left an even number of bits s.t. there
+ are an even number of words and either (or both) of the most
+ significant bits are set. This way, sqrt(OP) has exactly half as
+ many words as OP, and has its most significant bit set.
+
+ 2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables.
+ This approximation is used for the first single-precision
+ iterations of Newton's method, yielding a full-word approximation
+ to sqrt(OP).
+
+ 3. Perform multiple-precision Newton iteration until we have the
+ exact result. Only about half of the input operand is used in
+ this calculation, as the square root is perfectly determinable
+ from just the higher half of a number. */
+
+/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */
+#if defined __GNUC__ && ! defined __SOFT_FLOAT
+
+#if defined (__sparc__) && BITS_PER_MP_LIMB == 32
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if defined (__HAVE_68881__)
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if defined (__hppa) && BITS_PER_MP_LIMB == 32
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if defined (_ARCH_PWR2) && BITS_PER_MP_LIMB == 32
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if 0
+#if defined (__i386__) || defined (__i486__)
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt" : "=t" (__sqrt_res) : "0" (a)); \
+ __sqrt_res; \
+ })
+#endif
+#endif
+
+#endif
+
+#ifndef SQRT
+
+/* Tables for initial approximation of the square root. These are
+ indexed with bits 1-8 of the operand for which the square root is
+ calculated, where bit 0 is the most significant non-zero bit. I.e.
+ the most significant one-bit is not used, since that per definition
+ is one. Likewise, the tables don't return the highest bit of the
+ result. That bit must be inserted by or:ing the returned value with
+ 0x100. This way, we get a 9-bit approximation from 8-bit tables! */
+
+/* Table to be used for operands with an even total number of bits.
+ (Exactly as in the decimal system there are similarities between the
+ square root of numbers with the same initial digits and an even
+ difference in the total number of digits. Consider the square root
+ of 1, 10, 100, 1000, ...) */
+static const unsigned char even_approx_tab[256] =
+{
+ 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
+ 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
+ 0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79,
+ 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84,
+ 0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89,
+ 0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f,
+ 0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94,
+ 0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99,
+ 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e,
+ 0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3,
+ 0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7,
+ 0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac,
+ 0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1,
+ 0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6,
+ 0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba,
+ 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf,
+ 0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3,
+ 0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8,
+ 0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc,
+ 0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1,
+ 0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5,
+ 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda,
+ 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde,
+ 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2,
+ 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6,
+ 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb,
+ 0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef,
+ 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3,
+ 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7,
+ 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb,
+ 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff,
+};
+
+/* Table to be used for operands with an odd total number of bits.
+ (Further comments before previous table.) */
+static const unsigned char odd_approx_tab[256] =
+{
+ 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+ 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
+ 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b,
+ 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f,
+ 0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12,
+ 0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16,
+ 0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a,
+ 0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d,
+ 0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21,
+ 0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24,
+ 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28,
+ 0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b,
+ 0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f,
+ 0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32,
+ 0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35,
+ 0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39,
+ 0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c,
+ 0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f,
+ 0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42,
+ 0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45,
+ 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49,
+ 0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c,
+ 0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f,
+ 0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52,
+ 0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55,
+ 0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58,
+ 0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b,
+ 0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e,
+ 0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61,
+ 0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63,
+ 0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66,
+ 0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69,
+};
+#endif
+
+
+mp_size_t
+#if __STDC__
+mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size)
+#else
+mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
+ mp_ptr root_ptr;
+ mp_ptr rem_ptr;
+ mp_srcptr op_ptr;
+ mp_size_t op_size;
+#endif
+{
+ /* R (root result) */
+ mp_ptr rp; /* Pointer to least significant word */
+ mp_size_t rsize; /* The size in words */
+
+ /* T (OP shifted to the left a.k.a. normalized) */
+ mp_ptr tp; /* Pointer to least significant word */
+ mp_size_t tsize; /* The size in words */
+ mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */
+ mp_limb_t t_high0, t_high1; /* The two most significant words */
+
+ /* TT (temporary for numerator/remainder) */
+ mp_ptr ttp; /* Pointer to least significant word */
+
+ /* X (temporary for quotient in main loop) */
+ mp_ptr xp; /* Pointer to least significant word */
+ mp_size_t xsize; /* The size in words */
+
+ unsigned cnt;
+ mp_limb_t initial_approx; /* Initially made approximation */
+ mp_size_t tsizes[BITS_PER_MP_LIMB]; /* Successive calculation precisions */
+ mp_size_t tmp;
+ mp_size_t i;
+
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+
+ /* If OP is zero, both results are zero. */
+ if (op_size == 0)
+ return 0;
+
+ count_leading_zeros (cnt, op_ptr[op_size - 1]);
+ tsize = op_size;
+ if ((tsize & 1) != 0)
+ {
+ cnt += BITS_PER_MP_LIMB;
+ tsize++;
+ }
+
+ rsize = tsize / 2;
+ rp = root_ptr;
+
+ TMP_MARK (marker);
+
+ /* Shift OP an even number of bits into T, such that either the most or
+ the second most significant bit is set, and such that the number of
+ words in T becomes even. This way, the number of words in R=sqrt(OP)
+ is exactly half as many as in OP, and the most significant bit of R
+ is set.
+
+ Also, the initial approximation is simplified by this up-shifted OP.
+
+ Finally, the Newtonian iteration which is the main part of this
+ program performs division by R. The fast division routine expects
+ the divisor to be "normalized" in exactly the sense of having the
+ most significant bit set. */
+
+ tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+
+ if ((cnt & ~1) % BITS_PER_MP_LIMB != 0)
+ t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size,
+ (cnt & ~1) % BITS_PER_MP_LIMB);
+ else
+ MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size);
+
+ if (cnt >= BITS_PER_MP_LIMB)
+ tp[0] = 0;
+
+ t_high0 = tp[tsize - 1];
+ t_high1 = tp[tsize - 2]; /* Never stray. TSIZE is >= 2. */
+
+/* Is there a fast sqrt instruction defined for this machine? */
+#ifdef SQRT
+ {
+ initial_approx = SQRT (t_high0 * MP_BASE_AS_DOUBLE + t_high1);
+ /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
+ become incorrect due to overflow in the conversion from double to
+ mp_limb_t above. It will typically be zero in that case, but might be
+ a small number on some machines. The most significant bit of
+ INITIAL_APPROX should be set, so that bit is a good overflow
+ indication. */
+ if ((mp_limb_signed_t) initial_approx >= 0)
+ initial_approx = ~(mp_limb_t)0;
+ }
+#else
+ /* Get a 9 bit approximation from the tables. The tables expect to
+ be indexed with the 8 high bits right below the highest bit.
+ Also, the highest result bit is not returned by the tables, and
+ must be or:ed into the result. The scheme gives 9 bits of start
+ approximation with just 256-entry 8 bit tables. */
+
+ if ((cnt & 1) == 0)
+ {
+ /* The most significant bit of t_high0 is set. */
+ initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
+ initial_approx &= 0xff;
+ initial_approx = even_approx_tab[initial_approx];
+ }
+ else
+ {
+ /* The most significant bit of t_high0 is unset,
+ the second most significant is set. */
+ initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
+ initial_approx &= 0xff;
+ initial_approx = odd_approx_tab[initial_approx];
+ }
+ initial_approx |= 0x100;
+ initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
+
+ /* Perform small precision Newtonian iterations to get a full word
+ approximation. For small operands, these iterations will do the
+ entire job. */
+ if (t_high0 == ~(mp_limb_t)0)
+ initial_approx = t_high0;
+ else
+ {
+ mp_limb_t quot;
+
+ if (t_high0 >= initial_approx)
+ initial_approx = t_high0 + 1;
+
+ /* First get about 18 bits with pure C arithmetics. */
+ quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2;
+ initial_approx = (initial_approx + quot) / 2;
+ initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+
+ /* Now get a full word by one (or for > 36 bit machines) several
+ iterations. */
+ for (i = 18; i < BITS_PER_MP_LIMB; i <<= 1)
+ {
+ mp_limb_t ignored_remainder;
+
+ udiv_qrnnd (quot, ignored_remainder,
+ t_high0, t_high1, initial_approx);
+ initial_approx = (initial_approx + quot) / 2;
+ initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+ }
+ }
+#endif
+
+ rp[0] = initial_approx;
+ rsize = 1;
+
+#ifdef SQRT_DEBUG
+ printf ("\n\nT = ");
+ mpn_dump (tp, tsize);
+#endif
+
+ if (tsize > 2)
+ {
+ /* Determine the successive precisions to use in the iteration. We
+ minimize the precisions, beginning with the highest (i.e. last
+ iteration) to the lowest (i.e. first iteration). */
+
+ xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+ ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+
+ t_end_ptr = tp + tsize;
+
+ tmp = tsize / 2;
+ for (i = 0;; i++)
+ {
+ tsize = (tmp + 1) / 2;
+ if (tmp == tsize)
+ break;
+ tsizes[i] = tsize + tmp;
+ tmp = tsize;
+ }
+
+ /* Main Newton iteration loop. For big arguments, most of the
+ time is spent here. */
+
+ /* It is possible to do a great optimization here. The successive
+ divisors in the mpn_divmod call below have more and more leading
+ words equal to its predecessor. Therefore the beginning of
+ each division will repeat the same work as did the last
+ division. If we could guarantee that the leading words of two
+ consecutive divisors are the same (i.e. in this case, a later
+ divisor has just more digits at the end) it would be a simple
+ matter of just using the old remainder of the last division in
+ a subsequent division, to take care of this optimization. This
+ idea would surely make a difference even for small arguments. */
+
+ /* Loop invariants:
+
+ R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1.
+ X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X.
+ R <= shiftdown_to_same_size(X). */
+
+ while (--i >= 0)
+ {
+ mp_limb_t cy;
+#ifdef SQRT_DEBUG
+ mp_limb_t old_least_sign_r = rp[0];
+ mp_size_t old_rsize = rsize;
+
+ printf ("R = ");
+ mpn_dump (rp, rsize);
+#endif
+ tsize = tsizes[i];
+
+ /* Need to copy the numerator into temporary space, as
+ mpn_divmod overwrites its numerator argument with the
+ remainder (which we currently ignore). */
+ MPN_COPY (ttp, t_end_ptr - tsize, tsize);
+ cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
+ xsize = tsize - rsize;
+
+#ifdef SQRT_DEBUG
+ printf ("X =%d ", cy);
+ mpn_dump (xp, xsize);
+#endif
+
+ /* Add X and R with the most significant limbs aligned,
+ temporarily ignoring at least one limb at the low end of X. */
+ tmp = xsize - rsize;
+ cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize);
+
+ /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get
+ intermediate roots that'd need an extra bit. We don't want to
+ handle that since it would make the subsequent divisor
+ non-normalized, so round such roots down to be only ones in the
+ current precision. */
+ if (cy == 2)
+ {
+ mp_size_t j;
+ for (j = xsize; j >= 0; j--)
+ xp[j] = ~(mp_limb_t)0;
+ }
+
+ /* Divide X by 2 and put the result in R. This is the new
+ approximation. Shift in the carry from the addition. */
+ mpn_rshift (rp, xp, xsize, 1);
+ rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
+ rsize = xsize;
+#ifdef SQRT_DEBUG
+ if (old_least_sign_r != rp[rsize - old_rsize])
+ printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
+ i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
+ 2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]);
+#endif
+ }
+ }
+
+#ifdef SQRT_DEBUG
+ printf ("(final) R = ");
+ mpn_dump (rp, rsize);
+#endif
+
+ /* We computed the square root of OP * 2**(2*floor(cnt/2)).
+ This has resulted in R being 2**floor(cnt/2) to large.
+ Shift it down here to fix that. */
+ if (cnt / 2 != 0)
+ {
+ mpn_rshift (rp, rp, rsize, cnt/2);
+ rsize -= rp[rsize - 1] == 0;
+ }
+
+ /* Calculate the remainder. */
+ mpn_mul_n (tp, rp, rp, rsize);
+ tsize = rsize + rsize;
+ tsize -= tp[tsize - 1] == 0;
+ if (op_size < tsize
+ || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0))
+ {
+ /* R is too large. Decrement it. */
+
+ /* These operations can't overflow. */
+ cy_limb = mpn_sub_n (tp, tp, rp, rsize);
+ cy_limb += mpn_sub_n (tp, tp, rp, rsize);
+ mpn_decr_u (tp + rsize, cy_limb);
+ mpn_incr_u (tp, (mp_limb_t) 1);
+
+ mpn_decr_u (rp, (mp_limb_t) 1);
+
+#ifdef SQRT_DEBUG
+ printf ("(adjusted) R = ");
+ mpn_dump (rp, rsize);
+#endif
+ }
+
+ if (rem_ptr != NULL)
+ {
+ cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize);
+ MPN_NORMALIZE (rem_ptr, op_size);
+ TMP_FREE (marker);
+ return op_size;
+ }
+ else
+ {
+ int res;
+ res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size);
+ TMP_FREE (marker);
+ return res;
+ }
+}
diff --git a/rts/gmp/mpn/generic/sub_n.c b/rts/gmp/mpn/generic/sub_n.c
new file mode 100644
index 0000000000..4f2f06099c
--- /dev/null
+++ b/rts/gmp/mpn/generic/sub_n.c
@@ -0,0 +1,62 @@
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to subtrahend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x - y; /* main subtract */
+ cy = (y > x) + cy; /* get out carry from the subtract, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
diff --git a/rts/gmp/mpn/generic/submul_1.c b/rts/gmp/mpn/generic/submul_1.c
new file mode 100644
index 0000000000..c7c08ee4af
--- /dev/null
+++ b/rts/gmp/mpn/generic/submul_1.c
@@ -0,0 +1,65 @@
+/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+ by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
+ from the limb vector pointed to by RES_PTR. Return the most significant
+ limb of the product, adjusted for carry-out from the subtraction.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+ register mp_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x - prod_low;
+ cy_limb += (prod_low > x);
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/tdiv_qr.c b/rts/gmp/mpn/generic/tdiv_qr.c
new file mode 100644
index 0000000000..b748b5d810
--- /dev/null
+++ b/rts/gmp/mpn/generic/tdiv_qr.c
@@ -0,0 +1,401 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+ write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
+ qxn is non-zero, generate that many fraction limbs and append them after the
+ other quotient limbs, and update the remainder accordningly. The input
+ operands are unaffected.
+
+ Preconditions:
+ 1. The most significant limb of of the divisor must be non-zero.
+ 2. No argument overlap is permitted. (??? relax this ???)
+ 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
+
+ The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+ complexity of multiplication.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
+#endif
+
+/* Extract the middle limb from ((h,,l) << cnt) */
+#define SHL(h,l,cnt) \
+ ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
+
+void
+#if __STDC__
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+ mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+#else
+mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_ptr rp;
+ mp_size_t qxn;
+ mp_srcptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+#endif
+{
+ /* FIXME:
+ 1. qxn
+ 2. pass allocated storage in additional parameter?
+ */
+ if (qxn != 0)
+ abort ();
+
+ switch (dn)
+ {
+ case 0:
+ DIVIDE_BY_ZERO;
+
+ case 1:
+ {
+ rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
+ return;
+ }
+
+ case 2:
+ {
+ int cnt;
+ mp_ptr n2p, d2p;
+ mp_limb_t qhl, cy;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ count_leading_zeros (cnt, dp[dn - 1]);
+ if (cnt != 0)
+ {
+ d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+ if (cy == 0)
+ qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
+ }
+ else
+ {
+ d2p = (mp_ptr) dp;
+ n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np, nn);
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
+ }
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ TMP_FREE (marker);
+ return;
+ }
+
+ default:
+ {
+ int adjust;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
+ if (nn + adjust >= 2 * dn)
+ {
+ mp_ptr n2p, d2p;
+ mp_limb_t cy;
+ int cnt;
+ count_leading_zeros (cnt, dp[dn - 1]);
+
+ qp[nn - dn] = 0; /* zero high quotient limb */
+ if (cnt != 0) /* normalize divisor if needed */
+ {
+ d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ nn += adjust;
+ }
+ else
+ {
+ d2p = (mp_ptr) dp;
+ n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np, nn);
+ n2p[nn] = 0;
+ nn += adjust;
+ }
+
+ if (dn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ else if (dn < BZ_THRESHOLD)
+ mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
+ else
+ {
+ /* Perform 2*dn / dn limb divisions as long as the limbs
+ in np last. */
+ mp_ptr q2p = qp + nn - 2 * dn;
+ n2p += nn - 2 * dn;
+ mpn_bz_divrem_n (q2p, n2p, d2p, dn);
+ nn -= dn;
+ while (nn >= 2 * dn)
+ {
+ mp_limb_t c;
+ q2p -= dn; n2p -= dn;
+ c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
+ ASSERT_ALWAYS (c == 0);
+ nn -= dn;
+ }
+
+ if (nn != dn)
+ {
+ n2p -= nn - dn;
+ /* In theory, we could fall out to the cute code below
+ since we now have exactly the situation that code
+ is designed to handle. We botch this badly and call
+ the basic mpn_sb_divrem_mn! */
+ if (dn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ else
+ mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
+ }
+ }
+
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ TMP_FREE (marker);
+ return;
+ }
+
+ /* When we come here, the numerator/partial remainder is less
+ than twice the size of the denominator. */
+
+ {
+ /* Problem:
+
+ Divide a numerator N with nn limbs by a denominator D with dn
+ limbs forming a quotient of nn-dn+1 limbs. When qn is small
+ compared to dn, conventional division algorithms perform poorly.
+ We want an algorithm that has an expected running time that is
+ dependent only on qn. It is assumed that the most significant
+ limb of the numerator is smaller than the most significant limb
+ of the denominator.
+
+ Algorithm (very informally stated):
+
+ 1) Divide the 2 x qn most significant limbs from the numerator
+ by the qn most significant limbs from the denominator. Call
+ the result qest. This is either the correct quotient, but
+ might be 1 or 2 too large. Compute the remainder from the
+ division. (This step is implemented by a mpn_divrem call.)
+
+ 2) Is the most significant limb from the remainder < p, where p
+ is the product of the most significant limb from the quotient
+ and the next(d). (Next(d) denotes the next ignored limb from
+ the denominator.) If it is, decrement qest, and adjust the
+ remainder accordingly.
+
+ 3) Is the remainder >= qest? If it is, qest is the desired
+ quotient. The algorithm terminates.
+
+ 4) Subtract qest x next(d) from the remainder. If there is
+ borrow out, decrement qest, and adjust the remainder
+ accordingly.
+
+ 5) Skip one word from the denominator (i.e., let next(d) denote
+ the next less significant limb. */
+
+ mp_size_t qn;
+ mp_ptr n2p, d2p;
+ mp_ptr tp;
+ mp_limb_t cy;
+ mp_size_t in, rn;
+ mp_limb_t quotient_too_large;
+ int cnt;
+
+ qn = nn - dn;
+ qp[qn] = 0; /* zero high quotient limb */
+ qn += adjust; /* qn cannot become bigger */
+
+ if (qn == 0)
+ {
+ MPN_COPY (rp, np, dn);
+ TMP_FREE (marker);
+ return;
+ }
+
+ in = dn - qn; /* (at least partially) ignored # of limbs in ops */
+ /* Normalize denominator by shifting it to the left such that its
+ most significant bit is set. Then shift the numerator the same
+ amount, to mathematically preserve quotient. */
+ count_leading_zeros (cnt, dp[dn - 1]);
+ if (cnt != 0)
+ {
+ d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
+
+ mpn_lshift (d2p, dp + in, qn, cnt);
+ d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
+
+ n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+ if (adjust)
+ {
+ n2p[2 * qn] = cy;
+ n2p++;
+ }
+ else
+ {
+ n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
+ }
+ }
+ else
+ {
+ d2p = (mp_ptr) dp + in;
+
+ n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+ if (adjust)
+ {
+ n2p[2 * qn] = 0;
+ n2p++;
+ }
+ }
+
+ /* Get an approximate quotient using the extracted operands. */
+ if (qn == 1)
+ {
+ mp_limb_t q0, r0;
+ mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
+ /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
+ temps here. This doesn't hurt code quality on any machines
+ so we do it unconditionally. */
+ gcc272bug_n1 = n2p[1];
+ gcc272bug_n0 = n2p[0];
+ gcc272bug_d0 = d2p[0];
+ udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
+ n2p[0] = r0;
+ qp[0] = q0;
+ }
+ else if (qn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
+ else if (qn < BZ_THRESHOLD)
+ mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
+ else
+ mpn_bz_divrem_n (qp, n2p, d2p, qn);
+
+ rn = qn;
+ /* Multiply the first ignored divisor limb by the most significant
+ quotient limb. If that product is > the partial remainder's
+ most significant limb, we know the quotient is too large. This
+ test quickly catches most cases where the quotient is too large;
+ it catches all cases where the quotient is 2 too large. */
+ {
+ mp_limb_t dl, x;
+ mp_limb_t h, l;
+
+ if (in - 2 < 0)
+ dl = 0;
+ else
+ dl = dp[in - 2];
+
+ x = SHL (dp[in - 1], dl, cnt);
+ umul_ppmm (h, l, x, qp[qn - 1]);
+
+ if (n2p[qn - 1] < h)
+ {
+ mp_limb_t cy;
+
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ cy = mpn_add_n (n2p, n2p, d2p, qn);
+ if (cy)
+ {
+ /* The partial remainder is safely large. */
+ n2p[qn] = cy;
+ ++rn;
+ }
+ }
+ }
+
+ quotient_too_large = 0;
+ if (cnt != 0)
+ {
+ mp_limb_t cy1, cy2;
+
+ /* Append partially used numerator limb to partial remainder. */
+ cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
+ n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
+
+ /* Update partial remainder with partially used divisor limb. */
+ cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
+ if (qn != rn)
+ {
+ if (n2p[qn] < cy2)
+ abort ();
+ n2p[qn] -= cy2;
+ }
+ else
+ {
+ n2p[qn] = cy1 - cy2;
+
+ quotient_too_large = (cy1 < cy2);
+ ++rn;
+ }
+ --in;
+ }
+ /* True: partial remainder now is neutral, i.e., it is not shifted up. */
+
+ tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+
+ if (in < qn)
+ {
+ if (in == 0)
+ {
+ MPN_COPY (rp, n2p, rn);
+ if (rn != dn)
+ abort ();
+ goto foo;
+ }
+ mpn_mul (tp, qp, qn, dp, in);
+ }
+ else
+ mpn_mul (tp, dp, in, qp, qn);
+
+ cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+ MPN_COPY (rp + in, n2p, dn - in);
+ quotient_too_large |= cy;
+ cy = mpn_sub_n (rp, np, tp, in);
+ cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+ quotient_too_large |= cy;
+ foo:
+ if (quotient_too_large)
+ {
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ mpn_add_n (rp, rp, dp, dn);
+ }
+ }
+ TMP_FREE (marker);
+ return;
+ }
+ }
+}
diff --git a/rts/gmp/mpn/generic/udiv_w_sdiv.c b/rts/gmp/mpn/generic/udiv_w_sdiv.c
new file mode 100644
index 0000000000..061cce86e1
--- /dev/null
+++ b/rts/gmp/mpn/generic/udiv_w_sdiv.c
@@ -0,0 +1,131 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+ division.
+
+ Contributed by Peter L. Montgomery.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY SAFE
+ TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE
+ GNU MP RELEASE.
+
+
+Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (rp, a1, a0, d)
+ mp_limb_t *rp, a1, a0, d;
+{
+ mp_limb_t q, r;
+ mp_limb_t c0, c1, b1;
+
+ if ((mp_limb_signed_t) d >= 0)
+ {
+ if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
+ {
+ /* dividend, divisor, and quotient are nonnegative */
+ sdiv_qrnnd (q, r, a1, a0, d);
+ }
+ else
+ {
+ /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+ sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
+ /* Divide (c1*2^32 + c0) by d */
+ sdiv_qrnnd (q, r, c1, c0, d);
+ /* Add 2^31 to quotient */
+ q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+ }
+ }
+ else
+ {
+ b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */
+ c1 = a1 >> 1; /* A/2 */
+ c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
+
+ if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */
+ {
+ sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+ r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */
+ if ((d & 1) != 0)
+ {
+ if (r >= q)
+ r = r - q;
+ else if (q - r <= d)
+ {
+ r = r - q + d;
+ q--;
+ }
+ else
+ {
+ r = r - q + 2*d;
+ q -= 2;
+ }
+ }
+ }
+ else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */
+ {
+ c1 = (b1 - 1) - c1;
+ c0 = ~c0; /* logical NOT */
+
+ sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+ q = ~q; /* (A/2)/b1 */
+ r = (b1 - 1) - r;
+
+ r = 2*r + (a0 & 1); /* A/(2*b1) */
+
+ if ((d & 1) != 0)
+ {
+ if (r >= q)
+ r = r - q;
+ else if (q - r <= d)
+ {
+ r = r - q + d;
+ q--;
+ }
+ else
+ {
+ r = r - q + 2*d;
+ q -= 2;
+ }
+ }
+ }
+ else /* Implies c1 = b1 */
+ { /* Hence a1 = d - 1 = 2*b1 - 1 */
+ if (a0 >= -d)
+ {
+ q = -1;
+ r = a0 + d;
+ }
+ else
+ {
+ q = -2;
+ r = a0 + 2*d;
+ }
+ }
+ }
+
+ *rp = r;
+ return q;
+}
diff --git a/rts/gmp/mpn/hppa/README b/rts/gmp/mpn/hppa/README
new file mode 100644
index 0000000000..97e7abe011
--- /dev/null
+++ b/rts/gmp/mpn/hppa/README
@@ -0,0 +1,91 @@
+This directory contains mpn functions for various HP PA-RISC chips. Code
+that runs faster on the PA7100 and later implementations, is in the pa7100
+directory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+ Load and Store timing
+
+On the PA7000 no memory instructions can issue the two cycles after a store.
+For the PA7100, this is reduced to one cycle.
+
+The PA7100 has a lookup-free cache, so it helps to schedule loads and the
+dependent instruction really far from each other.
+
+STATUS
+
+1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
+ instructions below (but some sw pipelining is needed to avoid the
+ xmpyu-fstds delay):
+
+ fldds s1_ptr
+
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+
+ addc
+ stws res_ptr
+ addc
+ stws res_ptr
+
+ addib Loop
+
+2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
+ (asymptotically) on the PA7100, using the instructions below. With proper
+ sw pipelining and the unrolling level below, the speed becomes 8
+ cycles/limb.
+
+ fldds s1_ptr
+ fldds s1_ptr
+
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ addc
+ addc
+ addc
+ addc
+ addc %r0,%r0,cy-limb
+
+ ldws res_ptr
+ ldws res_ptr
+ ldws res_ptr
+ ldws res_ptr
+ add
+ stws res_ptr
+ addc
+ stws res_ptr
+ addc
+ stws res_ptr
+ addc
+ stws res_ptr
+
+ addib
+
+3. For the PA8000 we have to stick to using 32-bit limbs before compiler
+ support emerges. But we want to use 64-bit operations whenever possible,
+ in particular for loads and stores. It is possible to handle mpn_add_n
+ efficiently by rotating (when s1/s2 are aligned), masking+bit field
+ inserting when (they are not). The speed should double compared to the
+ code used today.
diff --git a/rts/gmp/mpn/hppa/add_n.s b/rts/gmp/mpn/hppa/add_n.s
new file mode 100644
index 0000000000..c53b2f71b3
--- /dev/null
+++ b/rts/gmp/mpn/hppa/add_n.s
@@ -0,0 +1,58 @@
+; HP-PA __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; One might want to unroll this as for other processors, but it turns
+; out that the data cache contention after a store makes such
+; unrolling useless. We can't come under 5 cycles/limb anyway.
+
+ .code
+ .export __gmpn_add_n
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,= -1,%r23,L$end ; check for (SIZE == 1)
+ add %r20,%r19,%r28 ; add first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,<> -1,%r23,L$loop
+ addc %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r0,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/gmp-mparam.h b/rts/gmp/mpn/hppa/gmp-mparam.h
new file mode 100644
index 0000000000..98b6d9ce3c
--- /dev/null
+++ b/rts/gmp/mpn/hppa/gmp-mparam.h
@@ -0,0 +1,63 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the PA7100 using GCC. */
+/* Generated by tuneup.c, 2000-07-25. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 30
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 172
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 59
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 185
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 96
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 122
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 18
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 33
+#endif
diff --git a/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s b/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s
new file mode 100644
index 0000000000..c7d218f922
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s
@@ -0,0 +1,102 @@
+; HP-PA-1.1 __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r26
+; s1_ptr r25
+; size r24
+; s2_limb r23
+
+; This runs at 11 cycles/limb on a PA7000. With the used instructions, it
+; can not become faster due to data cache contention after a store. On the
+; PA7100 it runs at 10 cycles/limb, and that can not be improved either,
+; since only the xmpyu does not need the integer pipeline, so the only
+; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
+; on the PA7100.
+
+; There are some ideas described in mul_1.s that applies to this code too.
+
+ .code
+ .export __gmpn_addmul_1
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) ; move s2_limb ...
+ addib,= -1,%r24,L$just_one_limb
+ fldws -16(%r30),%fr4 ; ... into fr4
+ add %r0,%r0,%r0 ; clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 ; least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L$end
+ ldw -12(%r30),%r1
+
+; Main loop
+L$loop ldws 0(%r26),%r29
+ fldws,ma 4(%r25),%fr5
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addc %r0,%r28,%r28
+ addib,<> -1,%r24,L$loop
+ ldw -12(%r30),%r1
+
+L$end ldw 0(%r26),%r29
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ ldws 0(%r26),%r29
+ addc %r0,%r28,%r28
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+L$just_one_limb
+ xmpyu %fr4,%fr5,%fr6
+ ldw 0(%r26),%r29
+ fstds %fr6,-16(%r30)
+ ldw -12(%r30),%r1
+ ldw -16(%r30),%r28
+ add %r29,%r1,%r19
+ stw %r19,0(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/mul_1.s b/rts/gmp/mpn/hppa/hppa1_1/mul_1.s
new file mode 100644
index 0000000000..4512fddec9
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/mul_1.s
@@ -0,0 +1,98 @@
+; HP-PA-1.1 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r26
+; s1_ptr r25
+; size r24
+; s2_limb r23
+
+; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
+; not become faster due to data cache contention after a store. On the
+; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
+; only the xmpyu does not need the integer pipeline, so the only dual-issue
+; we will get are addc+xmpyu. Unrolling would not help either CPU.
+
+; We could use fldds to read two limbs at a time from the S1 array, and that
+; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+; PA7100, respectively. We don't do that since it does not seem worth the
+; (alignment) troubles...
+
+; At least the PA7100 is rumored to be able to deal with cache-misses
+; without stalling instruction issue. If this is true, and the cache is
+; actually also lockup-free, we should use a deeper software pipeline, and
+; load from S1 very early! (The loads and stores to -12(sp) will surely be
+; in the cache.)
+
+ .code
+ .export __gmpn_mul_1
+__gmpn_mul_1
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) ; move s2_limb ...
+ addib,= -1,%r24,L$just_one_limb
+ fldws -16(%r30),%fr4 ; ... into fr4
+ add %r0,%r0,%r0 ; clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 ; least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L$end
+ ldw -12(%r30),%r1
+
+; Main loop
+L$loop fldws,ma 4(%r25),%fr5
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addib,<> -1,%r24,L$loop
+ ldw -12(%r30),%r1
+
+L$end stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ stws,ma %r19,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+L$just_one_limb
+ xmpyu %fr4,%fr5,%fr6
+ fstds %fr6,-16(%r30)
+ ldw -16(%r30),%r28
+ ldo -64(%r30),%r30
+ bv 0(%r2)
+ fstws %fr6R,0(%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s
new file mode 100644
index 0000000000..4f4be08b37
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s
@@ -0,0 +1,75 @@
+; HP-PA __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+; This is optimized for the PA7100, where is runs at 4.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+ .code
+ .export __gmpn_add_n
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,<= -5,%r23,L$rest
+ add %r20,%r19,%r28 ; add first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -4,%r23,L$loop
+ addc %r20,%r19,%r28
+
+L$rest addib,= 4,%r23,L$end
+ nop
+L$eloop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -1,%r23,L$eloop
+ addc %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r0,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S b/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S
new file mode 100644
index 0000000000..04db06822e
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S
@@ -0,0 +1,189 @@
+; HP-PA 7100/7200 __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define res_ptr %r26
+#define s1_ptr %r25
+#define size %r24
+#define s2_limb %r23
+
+#define cylimb %r28
+#define s0 %r19
+#define s1 %r20
+#define s2 %r3
+#define s3 %r4
+#define lo0 %r21
+#define lo1 %r5
+#define lo2 %r6
+#define lo3 %r7
+#define hi0 %r22
+#define hi1 %r23 /* safe to reuse */
+#define hi2 %r29
+#define hi3 %r1
+
+ .code
+ .export __gmpn_addmul_1
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+
+ ldo 128(%r30),%r30
+ stws s2_limb,-16(%r30)
+ add %r0,%r0,cylimb ; clear cy and cylimb
+ addib,< -4,size,L$few_limbs
+ fldws -16(%r30),%fr31R
+
+ ldo -112(%r30),%r31
+ stw %r3,-96(%r30)
+ stw %r4,-92(%r30)
+ stw %r5,-88(%r30)
+ stw %r6,-84(%r30)
+ stw %r7,-80(%r30)
+
+ bb,>=,n s1_ptr,29,L$0
+
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r31)
+ ldws -16(%r31),cylimb
+ ldws -12(%r31),lo0
+ add s0,lo0,s0
+ addib,< -1,size,L$few_limbs
+ stws,ma s0,4(res_ptr)
+
+; start software pipeline ----------------------------------------------------
+L$0 fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ xmpyu %fr4L,%fr31R,%fr5
+ xmpyu %fr4R,%fr31R,%fr6
+ xmpyu %fr8L,%fr31R,%fr9
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ fstds %fr6,-8(%r31)
+ fstds %fr9,0(%r31)
+ fstds %fr10,8(%r31)
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ addc lo1,hi0,lo1
+ addc lo2,hi1,lo2
+ addc lo3,hi2,lo3
+
+ addib,< -4,size,L$end
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; main loop ------------------------------------------------------------------
+L$loop fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ ldws 0(res_ptr),s0
+ xmpyu %fr4L,%fr31R,%fr5
+ ldws 4(res_ptr),s1
+ xmpyu %fr4R,%fr31R,%fr6
+ ldws 8(res_ptr),s2
+ xmpyu %fr8L,%fr31R,%fr9
+ ldws 12(res_ptr),s3
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ add s0,lo0,s0
+ fstds %fr6,-8(%r31)
+ addc s1,lo1,s1
+ fstds %fr9,0(%r31)
+ addc s2,lo2,s2
+ fstds %fr10,8(%r31)
+ addc s3,lo3,s3
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ stws,ma s0,4(res_ptr)
+ addc lo1,hi0,lo1
+ stws,ma s1,4(res_ptr)
+ addc lo2,hi1,lo2
+ stws,ma s2,4(res_ptr)
+ addc lo3,hi2,lo3
+ stws,ma s3,4(res_ptr)
+
+ addib,>= -4,size,L$loop
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; finish software pipeline ---------------------------------------------------
+L$end ldws 0(res_ptr),s0
+ ldws 4(res_ptr),s1
+ ldws 8(res_ptr),s2
+ ldws 12(res_ptr),s3
+
+ add s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ addc s1,lo1,s1
+ stws,ma s1,4(res_ptr)
+ addc s2,lo2,s2
+ stws,ma s2,4(res_ptr)
+ addc s3,lo3,s3
+ stws,ma s3,4(res_ptr)
+
+; restore callee-saves registers ---------------------------------------------
+ ldw -96(%r30),%r3
+ ldw -92(%r30),%r4
+ ldw -88(%r30),%r5
+ ldw -84(%r30),%r6
+ ldw -80(%r30),%r7
+
+L$few_limbs
+ addib,=,n 4,size,L$ret
+L$loop2 fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r30)
+ ldws -16(%r30),hi0
+ ldws -12(%r30),lo0
+ addc lo0,cylimb,lo0
+ addc %r0,hi0,cylimb
+ add s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ addib,<> -1,size,L$loop2
+ nop
+
+L$ret addc %r0,cylimb,cylimb
+ bv 0(%r2)
+ ldo -128(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s
new file mode 100644
index 0000000000..31669b1a55
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s
@@ -0,0 +1,83 @@
+; HP-PA __gmpn_lshift --
+; This is optimized for the PA7100, where is runs at 3.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_lshift
+__gmpn_lshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ sh2add %r24,%r25,%r25
+ sh2add %r24,%r26,%r26
+ ldws,mb -4(0,%r25),%r22
+ subi 32,%r23,%r1
+ mtsar %r1
+ addib,= -1,%r24,L$0004
+ vshd %r0,%r22,%r28 ; compute carry out limb
+ ldws,mb -4(0,%r25),%r29
+ addib,<= -5,%r24,L$rest
+ vshd %r22,%r29,%r20
+
+L$loop ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,> -4,%r24,L$loop
+ vshd %r22,%r29,%r20
+
+L$rest addib,= 4,%r24,L$end1
+ nop
+L$eloop ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ addib,<= -1,%r24,L$end2
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,> -1,%r24,L$eloop
+ vshd %r22,%r29,%r20
+
+L$end1 stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+L$end2 stws,mb %r20,-4(0,%r26)
+L$0004 vshd %r22,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s
new file mode 100644
index 0000000000..d32b10b4b1
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s
@@ -0,0 +1,80 @@
+; HP-PA __gmpn_rshift --
+; This is optimized for the PA7100, where is runs at 3.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_rshift
+__gmpn_rshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L$0004
+ vshd %r22,%r0,%r28 ; compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,<= -5,%r24,L$rest
+ vshd %r29,%r22,%r20
+
+L$loop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -4,%r24,L$loop
+ vshd %r29,%r22,%r20
+
+L$rest addib,= 4,%r24,L$end1
+ nop
+L$eloop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,<= -1,%r24,L$end2
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -1,%r24,L$eloop
+ vshd %r29,%r22,%r20
+
+L$end1 stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+L$end2 stws,ma %r20,4(0,%r26)
+L$0004 vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s
new file mode 100644
index 0000000000..0eec41c4b3
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s
@@ -0,0 +1,76 @@
+; HP-PA __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+; This is optimized for the PA7100, where is runs at 4.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+ .code
+ .export __gmpn_sub_n
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,<= -5,%r23,L$rest
+ sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -4,%r23,L$loop
+ subb %r20,%r19,%r28
+
+L$rest addib,= 4,%r23,L$end
+ nop
+L$eloop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -1,%r23,L$eloop
+ subb %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ addc %r0,%r0,%r28
+ bv 0(%r2)
+ subi 1,%r28,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S b/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S
new file mode 100644
index 0000000000..0fba21dcef
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S
@@ -0,0 +1,195 @@
+; HP-PA 7100/7200 __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define res_ptr %r26
+#define s1_ptr %r25
+#define size %r24
+#define s2_limb %r23
+
+#define cylimb %r28
+#define s0 %r19
+#define s1 %r20
+#define s2 %r3
+#define s3 %r4
+#define lo0 %r21
+#define lo1 %r5
+#define lo2 %r6
+#define lo3 %r7
+#define hi0 %r22
+#define hi1 %r23 /* safe to reuse */
+#define hi2 %r29
+#define hi3 %r1
+
+ .code
+ .export __gmpn_submul_1
+__gmpn_submul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+
+ ldo 128(%r30),%r30
+ stws s2_limb,-16(%r30)
+ add %r0,%r0,cylimb ; clear cy and cylimb
+ addib,< -4,size,L$few_limbs
+ fldws -16(%r30),%fr31R
+
+ ldo -112(%r30),%r31
+ stw %r3,-96(%r30)
+ stw %r4,-92(%r30)
+ stw %r5,-88(%r30)
+ stw %r6,-84(%r30)
+ stw %r7,-80(%r30)
+
+ bb,>=,n s1_ptr,29,L$0
+
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r31)
+ ldws -16(%r31),cylimb
+ ldws -12(%r31),lo0
+ sub s0,lo0,s0
+ add s0,lo0,%r0 ; invert cy
+ addib,< -1,size,L$few_limbs
+ stws,ma s0,4(res_ptr)
+
+; start software pipeline ----------------------------------------------------
+L$0 fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ xmpyu %fr4L,%fr31R,%fr5
+ xmpyu %fr4R,%fr31R,%fr6
+ xmpyu %fr8L,%fr31R,%fr9
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ fstds %fr6,-8(%r31)
+ fstds %fr9,0(%r31)
+ fstds %fr10,8(%r31)
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ addc lo1,hi0,lo1
+ addc lo2,hi1,lo2
+ addc lo3,hi2,lo3
+
+ addib,< -4,size,L$end
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; main loop ------------------------------------------------------------------
+L$loop fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ ldws 0(res_ptr),s0
+ xmpyu %fr4L,%fr31R,%fr5
+ ldws 4(res_ptr),s1
+ xmpyu %fr4R,%fr31R,%fr6
+ ldws 8(res_ptr),s2
+ xmpyu %fr8L,%fr31R,%fr9
+ ldws 12(res_ptr),s3
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ sub s0,lo0,s0
+ fstds %fr6,-8(%r31)
+ subb s1,lo1,s1
+ fstds %fr9,0(%r31)
+ subb s2,lo2,s2
+ fstds %fr10,8(%r31)
+ subb s3,lo3,s3
+ subb %r0,%r0,lo0 ; these two insns ...
+ add lo0,lo0,%r0 ; ... just invert cy
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ stws,ma s0,4(res_ptr)
+ addc lo1,hi0,lo1
+ stws,ma s1,4(res_ptr)
+ addc lo2,hi1,lo2
+ stws,ma s2,4(res_ptr)
+ addc lo3,hi2,lo3
+ stws,ma s3,4(res_ptr)
+
+ addib,>= -4,size,L$loop
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; finish software pipeline ---------------------------------------------------
+L$end ldws 0(res_ptr),s0
+ ldws 4(res_ptr),s1
+ ldws 8(res_ptr),s2
+ ldws 12(res_ptr),s3
+
+ sub s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ subb s1,lo1,s1
+ stws,ma s1,4(res_ptr)
+ subb s2,lo2,s2
+ stws,ma s2,4(res_ptr)
+ subb s3,lo3,s3
+ stws,ma s3,4(res_ptr)
+ subb %r0,%r0,lo0 ; these two insns ...
+ add lo0,lo0,%r0 ; ... invert cy
+
+; restore callee-saves registers ---------------------------------------------
+ ldw -96(%r30),%r3
+ ldw -92(%r30),%r4
+ ldw -88(%r30),%r5
+ ldw -84(%r30),%r6
+ ldw -80(%r30),%r7
+
+L$few_limbs
+ addib,=,n 4,size,L$ret
+L$loop2 fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r30)
+ ldws -16(%r30),hi0
+ ldws -12(%r30),lo0
+ addc lo0,cylimb,lo0
+ addc %r0,hi0,cylimb
+ sub s0,lo0,s0
+ add s0,lo0,%r0 ; invert cy
+ stws,ma s0,4(res_ptr)
+ addib,<> -1,size,L$loop2
+ nop
+
+L$ret addc %r0,cylimb,cylimb
+ bv 0(%r2)
+ ldo -128(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/submul_1.s b/rts/gmp/mpn/hppa/hppa1_1/submul_1.s
new file mode 100644
index 0000000000..20a5b5ce0a
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/submul_1.s
@@ -0,0 +1,111 @@
+; HP-PA-1.1 __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r26
+; s1_ptr r25
+; size r24
+; s2_limb r23
+
+; This runs at 12 cycles/limb on a PA7000. With the used instructions, it
+; can not become faster due to data cache contention after a store. On the
+; PA7100 it runs at 11 cycles/limb, and that can not be improved either,
+; since only the xmpyu does not need the integer pipeline, so the only
+; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
+; on the PA7100.
+
+; There are some ideas described in mul_1.s that applies to this code too.
+
+; It seems possible to make this run as fast as __gmpn_addmul_1, if we use
+; sub,>>= %r29,%r19,%r22
+; addi 1,%r28,%r28
+; but that requires reworking the hairy software pipeline...
+
+ .code
+ .export __gmpn_submul_1
+__gmpn_submul_1
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) ; move s2_limb ...
+ addib,= -1,%r24,L$just_one_limb
+ fldws -16(%r30),%fr4 ; ... into fr4
+ add %r0,%r0,%r0 ; clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 ; least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L$end
+ ldw -12(%r30),%r1
+
+; Main loop
+L$loop ldws 0(%r26),%r29
+ fldws,ma 4(%r25),%fr5
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addc %r0,%r28,%r28
+ addib,<> -1,%r24,L$loop
+ ldw -12(%r30),%r1
+
+L$end ldw 0(%r26),%r29
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ ldws 0(%r26),%r29
+ addc %r0,%r28,%r28
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+L$just_one_limb
+ xmpyu %fr4,%fr5,%fr6
+ ldw 0(%r26),%r29
+ fstds %fr6,-16(%r30)
+ ldw -12(%r30),%r1
+ ldw -16(%r30),%r28
+ sub %r29,%r1,%r22
+ add %r22,%r1,%r0
+ stw %r22,0(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S b/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S
new file mode 100644
index 0000000000..b83d6f4dd2
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S
@@ -0,0 +1,80 @@
+; HP-PA __udiv_qrnnd division support, used from longlong.h.
+; This version runs fast on PA 7000 and later.
+
+; Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; rem_ptr gr26
+; n1 gr25
+; n0 gr24
+; d gr23
+
+ .code
+L$0000 .word 0x43f00000 ; 2^64
+ .word 0x0
+ .export __gmpn_udiv_qrnnd
+__gmpn_udiv_qrnnd
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+ ldo 64(%r30),%r30
+
+ stws %r25,-16(0,%r30) ; n_hi
+ stws %r24,-12(0,%r30) ; n_lo
+#ifdef PIC
+ addil LT%L$0000,%r19
+ ldo RT%L$0000(%r1),%r19
+#else
+ ldil L%L$0000,%r19
+ ldo R%L$0000(%r19),%r19
+#endif
+ fldds -16(0,%r30),%fr5
+ stws %r23,-12(0,%r30)
+ comib,<= 0,%r25,L$1
+ fcnvxf,dbl,dbl %fr5,%fr5
+ fldds 0(0,%r19),%fr4
+ fadd,dbl %fr4,%fr5,%fr5
+L$1
+ fcpy,sgl %fr0,%fr6L
+ fldws -12(0,%r30),%fr6R
+ fcnvxf,dbl,dbl %fr6,%fr4
+
+ fdiv,dbl %fr5,%fr4,%fr5
+
+ fcnvfx,dbl,dbl %fr5,%fr4
+ fstws %fr4R,-16(%r30)
+ xmpyu %fr4R,%fr6R,%fr6
+ ldws -16(%r30),%r28
+ fstds %fr6,-16(0,%r30)
+ ldws -12(0,%r30),%r21
+ ldws -16(0,%r30),%r20
+ sub %r24,%r21,%r22
+ subb %r25,%r20,%r19
+ comib,= 0,%r19,L$2
+ ldo -64(%r30),%r30
+
+ add %r22,%r23,%r22
+ ldo -1(%r28),%r28
+L$2 bv 0(%r2)
+ stws %r22,0(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/umul.s b/rts/gmp/mpn/hppa/hppa1_1/umul.s
new file mode 100644
index 0000000000..1f1300ac9b
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/umul.s
@@ -0,0 +1,42 @@
+; Copyright (C) 1999 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+ .code
+ .export __umul_ppmm
+ .align 4
+__umul_ppmm
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ stw %r25,-16(0,%r30)
+ fldws -16(0,%r30),%fr22R
+ stw %r24,-16(0,%r30)
+ fldws -16(0,%r30),%fr22L
+ xmpyu %fr22R,%fr22L,%fr22
+ fstds %fr22,-16(0,%r30)
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ stw %r29,0(0,%r26)
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa2_0/add_n.s b/rts/gmp/mpn/hppa/hppa2_0/add_n.s
new file mode 100644
index 0000000000..6e97278a39
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa2_0/add_n.s
@@ -0,0 +1,88 @@
+; HP-PA 2.0 32-bit __gmpn_add_n -- Add two limb vectors of the same length > 0
+; and store sum in a third limb vector.
+
+; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .code
+ .export __gmpn_add_n
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ sub %r0,%r23,%r22
+ zdep %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ zdep %r22,29,3,%r22 ; r22 = 4 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldw 0(%r25),%r20
+ ldw 0(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,0(%r26)
+L$7 ldw 4(%r25),%r21
+ ldw 4(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,4(%r26)
+L$6 ldw 8(%r25),%r20
+ ldw 8(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,8(%r26)
+L$5 ldw 12(%r25),%r21
+ ldw 12(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,12(%r26)
+L$4 ldw 16(%r25),%r20
+ ldw 16(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,16(%r26)
+L$3 ldw 20(%r25),%r21
+ ldw 20(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,20(%r26)
+L$2 ldw 24(%r25),%r20
+ ldw 24(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,24(%r26)
+L$1 ldw 28(%r25),%r21
+ ldo 32(%r25),%r25
+ ldw 28(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,28(%r26)
+ ldo 32(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 32(%r26),%r26
+
+ bv (%r2)
+ .exit
+ addc %r0,%r0,%r28
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa2_0/sub_n.s b/rts/gmp/mpn/hppa/hppa2_0/sub_n.s
new file mode 100644
index 0000000000..7d9b50fc27
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa2_0/sub_n.s
@@ -0,0 +1,88 @@
+; HP-PA 2.0 32-bit __gmpn_sub_n -- Subtract two limb vectors of the same
+; length > 0 and store difference in a third limb vector.
+
+; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .code
+ .export __gmpn_sub_n
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ sub %r0,%r23,%r22
+ zdep %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ zdep %r22,29,3,%r22 ; r22 = 4 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldw 0(%r25),%r20
+ ldw 0(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,0(%r26)
+L$7 ldw 4(%r25),%r21
+ ldw 4(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,4(%r26)
+L$6 ldw 8(%r25),%r20
+ ldw 8(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,8(%r26)
+L$5 ldw 12(%r25),%r21
+ ldw 12(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,12(%r26)
+L$4 ldw 16(%r25),%r20
+ ldw 16(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,16(%r26)
+L$3 ldw 20(%r25),%r21
+ ldw 20(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,20(%r26)
+L$2 ldw 24(%r25),%r20
+ ldw 24(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,24(%r26)
+L$1 ldw 28(%r25),%r21
+ ldo 32(%r25),%r25
+ ldw 28(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,28(%r26)
+ ldo 32(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 32(%r26),%r26
+
+ addc %r0,%r0,%r28
+ bv (%r2)
+ .exit
+ subi 1,%r28,%r28
+ .procend
diff --git a/rts/gmp/mpn/hppa/lshift.s b/rts/gmp/mpn/hppa/lshift.s
new file mode 100644
index 0000000000..f5a2daad60
--- /dev/null
+++ b/rts/gmp/mpn/hppa/lshift.s
@@ -0,0 +1,66 @@
+; HP-PA __gmpn_lshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_lshift
+__gmpn_lshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ sh2add %r24,%r25,%r25
+ sh2add %r24,%r26,%r26
+ ldws,mb -4(0,%r25),%r22
+ subi 32,%r23,%r1
+ mtsar %r1
+ addib,= -1,%r24,L$0004
+ vshd %r0,%r22,%r28 ; compute carry out limb
+ ldws,mb -4(0,%r25),%r29
+ addib,= -1,%r24,L$0002
+ vshd %r22,%r29,%r20
+
+L$loop ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ addib,= -1,%r24,L$0003
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,<> -1,%r24,L$loop
+ vshd %r22,%r29,%r20
+
+L$0002 stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+L$0003 stws,mb %r20,-4(0,%r26)
+L$0004 vshd %r22,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/rshift.s b/rts/gmp/mpn/hppa/rshift.s
new file mode 100644
index 0000000000..e05e2f10b5
--- /dev/null
+++ b/rts/gmp/mpn/hppa/rshift.s
@@ -0,0 +1,63 @@
+; HP-PA __gmpn_rshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_rshift
+__gmpn_rshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L$0004
+ vshd %r22,%r0,%r28 ; compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,= -1,%r24,L$0002
+ vshd %r29,%r22,%r20
+
+L$loop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,= -1,%r24,L$0003
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,<> -1,%r24,L$loop
+ vshd %r29,%r22,%r20
+
+L$0002 stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+L$0003 stws,ma %r20,4(0,%r26)
+L$0004 vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/sub_n.s b/rts/gmp/mpn/hppa/sub_n.s
new file mode 100644
index 0000000000..8f770ad1ad
--- /dev/null
+++ b/rts/gmp/mpn/hppa/sub_n.s
@@ -0,0 +1,59 @@
+; HP-PA __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; One might want to unroll this as for other processors, but it turns
+; out that the data cache contention after a store makes such
+; unrolling useless. We can't come under 5 cycles/limb anyway.
+
+ .code
+ .export __gmpn_sub_n
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,= -1,%r23,L$end ; check for (SIZE == 1)
+ sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,<> -1,%r23,L$loop
+ subb %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ addc %r0,%r0,%r28
+ bv 0(%r2)
+ subi 1,%r28,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/udiv_qrnnd.s b/rts/gmp/mpn/hppa/udiv_qrnnd.s
new file mode 100644
index 0000000000..9aa3b8a830
--- /dev/null
+++ b/rts/gmp/mpn/hppa/udiv_qrnnd.s
@@ -0,0 +1,286 @@
+; HP-PA __udiv_qrnnd division support, used from longlong.h.
+; This version runs fast on pre-PA7000 CPUs.
+
+; Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; rem_ptr gr26
+; n1 gr25
+; n0 gr24
+; d gr23
+
+; The code size is a bit excessive. We could merge the last two ds;addc
+; sequences by simply moving the "bb,< Odd" instruction down. The only
+; trouble is the FFFFFFFF code that would need some hacking.
+
+ .code
+ .export __gmpn_udiv_qrnnd
+__gmpn_udiv_qrnnd
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ comb,< %r23,0,L$largedivisor
+ sub %r0,%r23,%r1 ; clear cy as side-effect
+ ds %r0,%r1,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r28
+ ds %r25,%r23,%r25
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r23,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r28,%r28,%r28
+
+L$largedivisor
+ extru %r24,31,1,%r19 ; r19 = n0 & 1
+ bb,< %r23,31,L$odd
+ extru %r23,30,31,%r22 ; r22 = d >> 1
+ shd %r25,%r24,1,%r24 ; r24 = new n0
+ extru %r25,30,31,%r25 ; r25 = new n1
+ sub %r0,%r22,%r21
+ ds %r0,%r21,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r22,%r25
+ sh1addl %r25,%r19,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r24,%r24,%r28
+
+L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1)
+ shd %r25,%r24,1,%r24 ; r24 = new n0
+ extru %r25,30,31,%r25 ; r25 = new n1
+ sub %r0,%r22,%r21
+ ds %r0,%r21,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r28
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r22,%r25
+ sh1addl %r25,%r19,%r25
+; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+ add,nuv %r28,%r25,%r25
+ addl %r25,%r1,%r25
+ addc %r0,%r28,%r28
+ sub,<< %r25,%r23,%r0
+ addl %r25,%r1,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r28,%r28
+
+; This is just a special case of the code above.
+; We come here when d == 0xFFFFFFFF
+L$FF.. add,uv %r25,%r24,%r24
+ sub,<< %r24,%r23,%r0
+ ldo 1(%r24),%r24
+ stws %r24,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r25,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/i960/README b/rts/gmp/mpn/i960/README
new file mode 100644
index 0000000000..d68a0a83eb
--- /dev/null
+++ b/rts/gmp/mpn/i960/README
@@ -0,0 +1,9 @@
+This directory contains mpn functions for Intel i960 processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The code in this directory is not well optimized.
+
+STATUS
+
+The code in this directory has not been tested.
diff --git a/rts/gmp/mpn/i960/add_n.s b/rts/gmp/mpn/i960/add_n.s
new file mode 100644
index 0000000000..387317a397
--- /dev/null
+++ b/rts/gmp/mpn/i960/add_n.s
@@ -0,0 +1,43 @@
+# I960 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_add_n
+___gmpn_add_n:
+ mov 0,g6 # clear carry-save register
+ cmpo 1,0 # clear cy
+
+Loop: subo 1,g3,g3 # update loop counter
+ ld (g1),g5 # load from s1_ptr
+ addo 4,g1,g1 # s1_ptr++
+ ld (g2),g4 # load from s2_ptr
+ addo 4,g2,g2 # s2_ptr++
+ cmpo g6,1 # restore cy from g6, relies on cy being 0
+ addc g4,g5,g4 # main add
+ subc 0,0,g6 # save cy in g6
+ st g4,(g0) # store result to res_ptr
+ addo 4,g0,g0 # res_ptr++
+ cmpobne 0,g3,Loop # when branch is taken, clears C bit
+
+ mov g6,g0
+ ret
diff --git a/rts/gmp/mpn/i960/addmul_1.s b/rts/gmp/mpn/i960/addmul_1.s
new file mode 100644
index 0000000000..7df1418356
--- /dev/null
+++ b/rts/gmp/mpn/i960/addmul_1.s
@@ -0,0 +1,48 @@
+# I960 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_mul_1
+___gmpn_mul_1:
+ subo g2,0,g2
+ shlo 2,g2,g4
+ subo g4,g1,g1
+ subo g4,g0,g13
+ mov 0,g0
+
+ cmpo 1,0 # clear C bit on AC.cc
+
+Loop: ld (g1)[g2*4],g5
+ emul g3,g5,g6
+ ld (g13)[g2*4],g5
+
+ addc g0,g6,g6 # relies on that C bit is clear
+ addc 0,g7,g7
+ addc g5,g6,g6 # relies on that C bit is clear
+ st g6,(g13)[g2*4]
+ addc 0,g7,g0
+
+ addo g2,1,g2
+ cmpobne 0,g2,Loop # when branch is taken, clears C bit
+
+ ret
diff --git a/rts/gmp/mpn/i960/mul_1.s b/rts/gmp/mpn/i960/mul_1.s
new file mode 100644
index 0000000000..5c0c985aa5
--- /dev/null
+++ b/rts/gmp/mpn/i960/mul_1.s
@@ -0,0 +1,45 @@
+# I960 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_mul_1
+___gmpn_mul_1:
+ subo g2,0,g2
+ shlo 2,g2,g4
+ subo g4,g1,g1
+ subo g4,g0,g13
+ mov 0,g0
+
+ cmpo 1,0 # clear C bit on AC.cc
+
+Loop: ld (g1)[g2*4],g5
+ emul g3,g5,g6
+
+ addc g0,g6,g6 # relies on that C bit is clear
+ st g6,(g13)[g2*4]
+ addc 0,g7,g0
+
+ addo g2,1,g2
+ cmpobne 0,g2,Loop # when branch is taken, clears C bit
+
+ ret
diff --git a/rts/gmp/mpn/i960/sub_n.s b/rts/gmp/mpn/i960/sub_n.s
new file mode 100644
index 0000000000..2db2d46aad
--- /dev/null
+++ b/rts/gmp/mpn/i960/sub_n.s
@@ -0,0 +1,43 @@
+# I960 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_sub_n
+___gmpn_sub_n:
+ mov 1,g6 # set carry-save register
+ cmpo 1,0 # clear cy
+
+Loop: subo 1,g3,g3 # update loop counter
+ ld (g1),g5 # load from s1_ptr
+ addo 4,g1,g1 # s1_ptr++
+ ld (g2),g4 # load from s2_ptr
+ addo 4,g2,g2 # s2_ptr++
+ cmpo g6,1 # restore cy from g6, relies on cy being 0
+ subc g4,g5,g4 # main subtract
+ subc 0,0,g6 # save cy in g6
+ st g4,(g0) # store result to res_ptr
+ addo 4,g0,g0 # res_ptr++
+ cmpobne 0,g3,Loop # when branch is taken, cy will be 0
+
+ mov g6,g0
+ ret
diff --git a/rts/gmp/mpn/lisp/gmpasm-mode.el b/rts/gmp/mpn/lisp/gmpasm-mode.el
new file mode 100644
index 0000000000..5d9da7fa1f
--- /dev/null
+++ b/rts/gmp/mpn/lisp/gmpasm-mode.el
@@ -0,0 +1,351 @@
+;;; gmpasm-mode.el -- GNU MP asm and m4 editing mode.
+
+
+;; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+;;
+;; This file is part of the GNU MP Library.
+;;
+;; The GNU MP Library is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU Lesser General Public License as published by
+;; the Free Software Foundation; either version 2.1 of the License, or (at your
+;; option) any later version.
+;;
+;; The GNU MP Library is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU Lesser General Public License
+;; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+;; MA 02111-1307, USA.
+
+
+;;; Commentary:
+;;
+;; gmpasm-mode is an editing mode for m4 processed assembler code and m4
+;; macro files in GMP. It's similar to m4-mode, but has a number of
+;; settings better suited to GMP.
+;;
+;;
+;; Install
+;; -------
+;;
+;; To make M-x gmpasm-mode available, put gmpasm-mode.el somewhere in the
+;; load-path and the following in .emacs
+;;
+;; (autoload 'gmpasm-mode "gmpasm-mode" nil t)
+;;
+;; To use gmpasm-mode automatically on all .asm and .m4 files, put the
+;; following in .emacs
+;;
+;; (add-to-list 'auto-mode-alist '("\\.asm\\'" . gmpasm-mode))
+;; (add-to-list 'auto-mode-alist '("\\.m4\\'" . gmpasm-mode))
+;;
+;; To have gmpasm-mode only on gmp files, try instead something like the
+;; following, which uses it only in a directory starting with "gmp", or a
+;; sub-directory of such.
+;;
+;; (add-to-list 'auto-mode-alist
+;; '("/gmp.*/.*\\.\\(asm\\|m4\\)\\'" . gmpasm-mode))
+;;
+;; Byte compiling will slightly speed up loading. If you want a docstring
+;; in the autoload you can use M-x update-file-autoloads if you set it up
+;; right.
+;;
+;;
+;; Emacsen
+;; -------
+;;
+;; FSF Emacs 20.x - gmpasm-mode is designed for this.
+;; XEmacs 20.x - seems to work.
+;;
+;; FSF Emacs 19.x - should work if replacements for some 20.x-isms are
+;; available. comment-region with "C" won't really do the right thing
+;; though.
+
+
+;;; Code:
+
+(defgroup gmpasm nil
+ "GNU MP m4 and asm editing."
+ :prefix "gmpasm-"
+ :group 'languages)
+
+(defcustom gmpasm-mode-hook nil
+ "*Hook called by `gmpasm-mode'."
+ :type 'hook
+ :group 'gmpasm)
+
+(defcustom gmpasm-comment-start-regexp "[#;!@C]"
+ "*Regexp matching possible comment styles.
+See `gmpasm-mode' docstring for how this is used."
+ :type 'regexp
+ :group 'gmpasm)
+
+
+(defun gmpasm-add-to-list-second (list-var element)
+ "(gmpasm-add-to-list-second LIST-VAR ELEMENT)
+
+Add ELEMENT to LIST-VAR as the second element in the list, if it isn't
+already in the list. If LIST-VAR is nil, then ELEMENT is just added as the
+sole element in the list.
+
+This is like `add-to-list', but it puts the new value second in the list.
+
+The first cons cell is copied rather than changed in-place, so references to
+the list elsewhere won't be affected."
+
+ (if (member element (symbol-value list-var))
+ (symbol-value list-var)
+ (set list-var
+ (if (symbol-value list-var)
+ (cons (car (symbol-value list-var))
+ (cons element
+ (cdr (symbol-value list-var))))
+ (list element)))))
+
+
+(defun gmpasm-delete-from-list (list-var element)
+ "(gmpasm-delete-from-list LIST-VAR ELEMENT)
+
+Delete ELEMENT from LIST-VAR, using `delete'.
+This is like `add-to-list', but the element is deleted from the list.
+The list is copied rather than changed in-place, so references to it elsewhere
+won't be affected."
+
+ (set list-var (delete element (copy-sequence (symbol-value list-var)))))
+
+
+(defvar gmpasm-mode-map
+ (let ((map (make-sparse-keymap)))
+
+ ;; assembler and dnl commenting
+ (define-key map "\C-c\C-c" 'comment-region)
+ (define-key map "\C-c\C-d" 'gmpasm-comment-region-dnl)
+
+ ;; kill an M-x compile, since it's not hard to put m4 into an infinite
+ ;; loop
+ (define-key map "\C-c\C-k" 'kill-compilation)
+
+ map)
+ "Keymap for `gmpasm-mode'.")
+
+
+(defvar gmpasm-mode-syntax-table
+ (let ((table (make-syntax-table)))
+ ;; underscore left as a symbol char, like C mode
+
+ ;; m4 quotes
+ (modify-syntax-entry ?` "('" table)
+ (modify-syntax-entry ?' ")`" table)
+
+ table)
+ "Syntax table used in `gmpasm-mode'.
+
+m4 ignores quote marks in # comments at the top level, but inside quotes #
+isn't special and all quotes are active. There seems no easy way to express
+this in the syntax table, so nothing is done for comments. Usually this is
+best, since it picks up invalid apostrophes in comments inside quotes.")
+
+
+(defvar gmpasm-font-lock-keywords
+ (eval-when-compile
+ (list
+ (cons
+ (concat
+ "\\b"
+ (regexp-opt
+ '("deflit" "defreg" "defframe" "defframe_pushl"
+ "define_not_for_expansion"
+ "ASM_START" "ASM_END" "PROLOGUE" "EPILOGUE"
+ "forloop"
+ "TEXT" "DATA" "ALIGN" "W32"
+ "builtin" "changecom" "changequote" "changeword" "debugfile"
+ "debugmode" "decr" "define" "defn" "divert" "divnum" "dumpdef"
+ "errprint" "esyscmd" "eval" "__file__" "format" "gnu" "ifdef"
+ "ifelse" "include" "incr" "index" "indir" "len" "__line__"
+ "m4exit" "m4wrap" "maketemp" "patsubst" "popdef" "pushdef"
+ "regexp" "shift" "sinclude" "substr" "syscmd" "sysval"
+ "traceoff" "traceon" "translit" "undefine" "undivert" "unix")
+ t)
+ "\\b") 'font-lock-keyword-face)))
+
+ "`font-lock-keywords' for `gmpasm-mode'.
+
+The keywords are m4 builtins and some of the GMP macros used in asm files.
+L and LF don't look good fontified, so they're omitted.
+
+The right assembler comment regexp is added dynamically buffer-local (with
+dnl too).")
+
+
+;; Initialized if gmpasm-mode finds filladapt loaded.
+(defvar gmpasm-filladapt-token-table nil
+ "Filladapt token table used in `gmpasm-mode'.")
+(defvar gmpasm-filladapt-token-match-table nil
+ "Filladapt token match table used in `gmpasm-mode'.")
+(defvar gmpasm-filladapt-token-conversion-table nil
+ "Filladapt token conversion table used in `gmpasm-mode'.")
+
+
+;;;###autoload
+(defun gmpasm-mode ()
+ "A major mode for editing GNU MP asm and m4 files.
+
+\\{gmpasm-mode-map}
+`comment-start' and `comment-end' are set buffer-local to assembler
+commenting appropriate for the CPU by looking for something matching
+`gmpasm-comment-start-regexp' at the start of a line, or \"#\" is used if
+there's no match (if \"#\" isn't what you want, type in a desired comment
+and do \\[gmpasm-mode] to reinitialize).
+
+`adaptive-fill-regexp' is set buffer-local to the standard regexp with
+`comment-start' and dnl added. If filladapt.el has been loaded it similarly
+gets `comment-start' and dnl added as buffer-local fill prefixes.
+
+Font locking has the m4 builtins, some of the GMP macros, m4 dnl commenting,
+and assembler commenting (based on the `comment-start' determined).
+
+Note that `gmpasm-comment-start-regexp' is only matched as a whole word, so
+the `C' in it is only matched as a whole word, not on something that happens
+to start with `C'. Also it's only the particular `comment-start' determined
+that's added for filling etc, not the whole `gmpasm-comment-start-regexp'.
+
+`gmpasm-mode-hook' is run after initializations are complete.
+"
+
+ (interactive)
+ (kill-all-local-variables)
+ (setq major-mode 'gmpasm-mode
+ mode-name "gmpasm")
+ (use-local-map gmpasm-mode-map)
+ (set-syntax-table gmpasm-mode-syntax-table)
+ (setq fill-column 76)
+
+ ;; Short instructions might fit with 32, but anything with labels or
+ ;; expressions soon needs the comments pushed out to column 40.
+ (setq comment-column 40)
+
+ ;; Don't want to find out the hard way which dumb assemblers don't like a
+ ;; missing final newline.
+ (set (make-local-variable 'require-final-newline) t)
+
+ ;; The first match of gmpasm-comment-start-regexp at the start of a line
+ ;; determines comment-start, or "#" if no match.
+ (set (make-local-variable 'comment-start)
+ (save-excursion
+ (goto-char (point-min))
+ (if (re-search-forward
+ (concat "^\\(" gmpasm-comment-start-regexp "\\)\\(\\s-\\|$\\)")
+ nil t)
+ (match-string 1)
+ "#")))
+ (set (make-local-variable 'comment-end) "")
+
+ ;; If comment-start ends in an alphanumeric then \b is used to match it
+ ;; only as a separate word. The test is for an alphanumeric rather than
+ ;; \w since we might try # or ! as \w characters but without wanting \b.
+ (let ((comment-regexp
+ (concat (regexp-quote comment-start)
+ (if (string-match "[a-zA-Z0-9]\\'" comment-start) "\\b"))))
+
+ ;; Whitespace is required before a comment-start so m4 $# doesn't match
+ ;; when comment-start is "#".
+ ;; Only spaces or tabs match after, so newline isn't included in the
+ ;; font lock below.
+ (set (make-local-variable 'comment-start-skip)
+ (concat "\\(^\\|\\s-\\)" comment-regexp "[ \t]*"))
+
+ ;; Comment fontification based on comment-start, matching through to the
+ ;; end of the line.
+ (add-to-list (make-local-variable 'gmpasm-font-lock-keywords)
+ (cons (concat
+ "\\(\\bdnl\\b\\|" comment-start-skip "\\).*$")
+ 'font-lock-comment-face))
+
+ (set (make-local-variable 'font-lock-defaults)
+ '(gmpasm-font-lock-keywords
+ t ; no syntactic fontification (of strings etc)
+ nil ; no case-fold
+ ((?_ . "w")) ; _ part of a word while fontifying
+ ))
+
+ ;; Paragraphs are separated by blank lines, or lines with only dnl or
+ ;; comment-start.
+ (set (make-local-variable 'paragraph-separate)
+ (concat "[ \t\f]*\\(\\(" comment-regexp "\\|dnl\\)[ \t]*\\)*$"))
+ (set (make-local-variable 'paragraph-start)
+ (concat "\f\\|" paragraph-separate))
+
+ ;; Adaptive fill gets dnl and comment-start as comment style prefixes on
+ ;; top of the standard regexp (which has # and ; already actually).
+ (set (make-local-variable 'adaptive-fill-regexp)
+ (concat "[ \t]*\\(\\("
+ comment-regexp
+ "\\|dnl\\|[-|#;>*]+\\|(?[0-9]+[.)]\\)[ \t]*\\)*"))
+ (set (make-local-variable 'adaptive-fill-first-line-regexp)
+ "\\`\\([ \t]*dnl\\)?[ \t]*\\'")
+
+ (when (fboundp 'filladapt-mode)
+ (when (not gmpasm-filladapt-token-table)
+ (setq gmpasm-filladapt-token-table
+ filladapt-token-table)
+ (setq gmpasm-filladapt-token-match-table
+ filladapt-token-match-table)
+ (setq gmpasm-filladapt-token-conversion-table
+ filladapt-token-conversion-table)
+
+ ;; Numbered bullet points like "2.1" get matched at the start of a
+ ;; line when it's really something like "2.1 cycles/limb", so delete
+ ;; this from the list. The regexp for "1.", "2." etc is left
+ ;; though.
+ (gmpasm-delete-from-list 'gmpasm-filladapt-token-table
+ '("[0-9]+\\(\\.[0-9]+\\)+[ \t]"
+ bullet))
+
+ ;; "%" as a comment prefix interferes with x86 register names
+ ;; like %eax, so delete this.
+ (gmpasm-delete-from-list 'gmpasm-filladapt-token-table
+ '("%+" postscript-comment))
+
+ (add-to-list 'gmpasm-filladapt-token-match-table
+ '(gmpasm-comment gmpasm-comment))
+ (add-to-list 'gmpasm-filladapt-token-conversion-table
+ '(gmpasm-comment . exact))
+ )
+
+ (set (make-local-variable 'filladapt-token-table)
+ gmpasm-filladapt-token-table)
+ (set (make-local-variable 'filladapt-token-match-table)
+ gmpasm-filladapt-token-match-table)
+ (set (make-local-variable 'filladapt-token-conversion-table)
+ gmpasm-filladapt-token-conversion-table)
+
+ ;; Add dnl and comment-start as fill prefixes.
+ ;; Comments in filladapt.el say filladapt-token-table must begin
+ ;; with ("^" beginning-of-line), so put our addition second.
+ (gmpasm-add-to-list-second 'filladapt-token-table
+ (list (concat "dnl[ \t]\\|" comment-regexp)
+ 'gmpasm-comment))
+ ))
+
+ (run-hooks 'gmpasm-mode-hook))
+
+
+(defun gmpasm-comment-region-dnl (beg end &optional arg)
+ "(gmpasm-comment-region BEG END &option ARG)
+
+Comment or uncomment each line in the region using `dnl'.
+With \\[universal-argument] prefix arg, uncomment each line in region.
+This is `comment-region', but using \"dnl\"."
+
+ (interactive "r\nP")
+ (let ((comment-start "dnl")
+ (comment-end ""))
+ (comment-region beg end arg)))
+
+
+(provide 'gmpasm-mode)
+
+;;; gmpasm-mode.el ends here
diff --git a/rts/gmp/mpn/m68k/add_n.S b/rts/gmp/mpn/m68k/add_n.S
new file mode 100644
index 0000000000..9e1d89d64f
--- /dev/null
+++ b/rts/gmp/mpn/m68k/add_n.S
@@ -0,0 +1,79 @@
+/* mc68020 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 16)
+ size (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_add_n)
+
+C_SYMBOL_NAME(__gmpn_add_n:)
+PROLOG(__gmpn_add_n)
+/* Save used registers on the stack. */
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,12),R(a2)
+ movel MEM_DISP(sp,16),R(a0)
+ movel MEM_DISP(sp,20),R(a1)
+ movel MEM_DISP(sp,24),R(d2)
+
+ eorw #1,R(d2)
+ lsrl #1,R(d2)
+ bcc L(L1)
+ subql #1,R(d2) /* clears cy as side effect */
+
+L(Loop:)
+ movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ addxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+L(L1:) movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ addxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+
+ dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
+ subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
+ subl #0x10000,R(d2)
+ bcs L(L2)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(Loop)
+
+L(L2:)
+ negl R(d0)
+
+/* Restore used registers from stack frame. */
+ movel MEM_POSTINC(sp),R(a2)
+ movel MEM_POSTINC(sp),R(d2)
+
+ rts
+EPILOG(__gmpn_add_n)
diff --git a/rts/gmp/mpn/m68k/lshift.S b/rts/gmp/mpn/m68k/lshift.S
new file mode 100644
index 0000000000..a539d5d42e
--- /dev/null
+++ b/rts/gmp/mpn/m68k/lshift.S
@@ -0,0 +1,150 @@
+/* mc68020 __gmpn_lshift -- Shift left a low-level natural-number integer.
+
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ s_size (sp + 16)
+ cnt (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_lshift)
+
+C_SYMBOL_NAME(__gmpn_lshift:)
+PROLOG(__gmpn_lshift)
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Lnormal)
+ cmpl R(s_ptr),R(res_ptr)
+ bls L(Lspecial) /* jump if s_ptr >= res_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(s_ptr,d0,l),R(a2)
+#endif
+ cmpl R(res_ptr),R(a2)
+ bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */
+
+L(Lnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+ movel MEM_PREDEC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsrl R(d5),R(d0) /* compute carry limb */
+
+ lsll R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Lend)
+ lsrl #1,R(s_size)
+ bcs L(L1)
+ subql #1,R(s_size)
+
+L(Loop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsrl R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_PREDEC(res_ptr)
+ lsll R(cnt),R(d2)
+L(L1:)
+ movel MEM_PREDEC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsrl R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+ lsll R(cnt),R(d1)
+
+ dbf R(s_size),L(Loop)
+ subl #0x10000,R(s_size)
+ bcc L(Loop)
+
+L(Lend:)
+ movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Lspecial:)
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LL1)
+ subql #1,R(s_size)
+
+L(LLoop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ addxl R(d2),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+L(LL1:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ addxl R(d2),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+
+ dbf R(s_size),L(LLoop)
+ addxl R(d0),R(d0) /* save cy in lsb */
+ subl #0x10000,R(s_size)
+ bcs L(LLend)
+ lsrl #1,R(d0) /* restore cy */
+ bra L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(__gmpn_lshift)
diff --git a/rts/gmp/mpn/m68k/mc68020/addmul_1.S b/rts/gmp/mpn/m68k/mc68020/addmul_1.S
new file mode 100644
index 0000000000..6638115d71
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/addmul_1.S
@@ -0,0 +1,83 @@
+/* mc68020 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+ the result to a second limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s1_size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_addmul_1)
+
+C_SYMBOL_NAME(__gmpn_addmul_1:)
+PROLOG(__gmpn_addmul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,20),R(res_ptr)
+ movel MEM_DISP(sp,24),R(s1_ptr)
+ movel MEM_DISP(sp,28),R(s1_size)
+ movel MEM_DISP(sp,32),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ clrl R(d5)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ addxl R(d5),R(d1)
+ addl R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ addxl R(d5),R(d0)
+ addl R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ addxl R(d5),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d5)
+
+ rts
+EPILOG(__gmpn_addmul_1)
diff --git a/rts/gmp/mpn/m68k/mc68020/mul_1.S b/rts/gmp/mpn/m68k/mc68020/mul_1.S
new file mode 100644
index 0000000000..fdd4c39d70
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/mul_1.S
@@ -0,0 +1,90 @@
+/* mc68020 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+ the result in a second limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s1_size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_mul_1)
+
+C_SYMBOL_NAME(__gmpn_mul_1:)
+PROLOG(__gmpn_mul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d4),MEM_PREDEC(sp)
+#if 0
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(d3),MEM_PREDEC(sp)
+ movel R(d4),MEM_PREDEC(sp)
+#endif
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,16),R(res_ptr)
+ movel MEM_DISP(sp,20),R(s1_ptr)
+ movel MEM_DISP(sp,24),R(s1_size)
+ movel MEM_DISP(sp,28),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ movel R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ movel R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ clrl R(d3)
+ addxl R(d3),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d4)
+#if 0
+ movel MEM_POSTINC(sp),R(d4)
+ movel MEM_POSTINC(sp),R(d3)
+ movel MEM_POSTINC(sp),R(d2)
+#endif
+ rts
+EPILOG(__gmpn_mul_1)
diff --git a/rts/gmp/mpn/m68k/mc68020/submul_1.S b/rts/gmp/mpn/m68k/mc68020/submul_1.S
new file mode 100644
index 0000000000..3c36b70166
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/submul_1.S
@@ -0,0 +1,83 @@
+/* mc68020 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ the result from a second limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s1_size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_submul_1)
+
+C_SYMBOL_NAME(__gmpn_submul_1:)
+PROLOG(__gmpn_submul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,20),R(res_ptr)
+ movel MEM_DISP(sp,24),R(s1_ptr)
+ movel MEM_DISP(sp,28),R(s1_size)
+ movel MEM_DISP(sp,32),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ clrl R(d5)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ addxl R(d5),R(d1)
+ subl R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ addxl R(d5),R(d0)
+ subl R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ addxl R(d5),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d5)
+
+ rts
+EPILOG(__gmpn_submul_1)
diff --git a/rts/gmp/mpn/m68k/mc68020/udiv.S b/rts/gmp/mpn/m68k/mc68020/udiv.S
new file mode 100644
index 0000000000..d00cf13558
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/udiv.S
@@ -0,0 +1,31 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+.text
+ .even
+.globl ___udiv_qrnnd
+___udiv_qrnnd:
+ movel sp@(4),a0
+ movel sp@(8),d1
+ movel sp@(12),d0
+ divul sp@(16),d1:d0
+ movel d1,a0@
+ rts
diff --git a/rts/gmp/mpn/m68k/mc68020/umul.S b/rts/gmp/mpn/m68k/mc68020/umul.S
new file mode 100644
index 0000000000..a34ae6c543
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/umul.S
@@ -0,0 +1,31 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+.text
+ .even
+.globl ___umul_ppmm
+___umul_ppmm:
+ movel sp@(4),a0
+ movel sp@(8),d1
+ movel sp@(12),d0
+ mulul d0,d0:d1
+ movel d1,a0@
+ rts
diff --git a/rts/gmp/mpn/m68k/rshift.S b/rts/gmp/mpn/m68k/rshift.S
new file mode 100644
index 0000000000..b47a48e52a
--- /dev/null
+++ b/rts/gmp/mpn/m68k/rshift.S
@@ -0,0 +1,149 @@
+/* mc68020 __gmpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ s_size (sp + 16)
+ cnt (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_rshift)
+
+C_SYMBOL_NAME(__gmpn_rshift:)
+PROLOG(__gmpn_rshift)
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Lnormal)
+ cmpl R(res_ptr),R(s_ptr)
+ bls L(Lspecial) /* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+ cmpl R(s_ptr),R(a2)
+ bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */
+
+L(Lnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsll R(d5),R(d0) /* compute carry limb */
+
+ lsrl R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Lend)
+ lsrl #1,R(s_size)
+ bcs L(L1)
+ subql #1,R(s_size)
+
+L(Loop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d2)
+L(L1:)
+ movel MEM_POSTINC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d1)
+
+ dbf R(s_size),L(Loop)
+ subl #0x10000,R(s_size)
+ bcc L(Loop)
+
+L(Lend:)
+ movel R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from most significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Lspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LL1)
+ subql #1,R(s_size)
+
+L(LLoop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+L(LL1:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+
+ dbf R(s_size),L(LLoop)
+ roxrl #1,R(d0) /* save cy in msb */
+ subl #0x10000,R(s_size)
+ bcs L(LLend)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(__gmpn_rshift)
diff --git a/rts/gmp/mpn/m68k/sub_n.S b/rts/gmp/mpn/m68k/sub_n.S
new file mode 100644
index 0000000000..ce45b24db5
--- /dev/null
+++ b/rts/gmp/mpn/m68k/sub_n.S
@@ -0,0 +1,79 @@
+/* mc68020 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ store difference in a third limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 16)
+ size (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_sub_n)
+
+C_SYMBOL_NAME(__gmpn_sub_n:)
+PROLOG(__gmpn_sub_n)
+/* Save used registers on the stack. */
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,12),R(a2)
+ movel MEM_DISP(sp,16),R(a0)
+ movel MEM_DISP(sp,20),R(a1)
+ movel MEM_DISP(sp,24),R(d2)
+
+ eorw #1,R(d2)
+ lsrl #1,R(d2)
+ bcc L(L1)
+ subql #1,R(d2) /* clears cy as side effect */
+
+L(Loop:)
+ movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ subxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+L(L1:) movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ subxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+
+ dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
+ subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
+ subl #0x10000,R(d2)
+ bcs L(L2)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(Loop)
+
+L(L2:)
+ negl R(d0)
+
+/* Restore used registers from stack frame. */
+ movel MEM_POSTINC(sp),R(a2)
+ movel MEM_POSTINC(sp),R(d2)
+
+ rts
+EPILOG(__gmpn_sub_n)
diff --git a/rts/gmp/mpn/m68k/syntax.h b/rts/gmp/mpn/m68k/syntax.h
new file mode 100644
index 0000000000..9eec279c06
--- /dev/null
+++ b/rts/gmp/mpn/m68k/syntax.h
@@ -0,0 +1,177 @@
+/* asm.h -- Definitions for 68k syntax variations.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#undef ALIGN
+
+#ifdef MIT_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)base@
+#define MEM_DISP(base,displacement)base@(displacement)
+#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale)
+#define MEM_PREDEC(memory_base)memory_base@-
+#define MEM_POSTINC(memory_base)memory_base@+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#define moveql moveq
+/* Use variable sized opcodes. */
+#define bcc jcc
+#define bcs jcs
+#define bls jls
+#define beq jeq
+#define bne jne
+#define bra jra
+#endif
+
+#ifdef SONY_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#endif
+
+#ifdef MOTOROLA_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT
+#define ALIGN
+#define GLOBL XDEF
+#define lea LEA
+#define movel MOVE.L
+#define moveml MOVEM.L
+#define moveql MOVEQ.L
+#define cmpl CMP.L
+#define orl OR.L
+#define clrl CLR.L
+#define eorw EOR.W
+#define lsrl LSR.L
+#define lsll LSL.L
+#define roxrl ROXR.L
+#define roxll ROXL.L
+#define addl ADD.L
+#define addxl ADDX.L
+#define addql ADDQ.L
+#define subl SUB.L
+#define subxl SUBX.L
+#define subql SUBQ.L
+#define negl NEG.L
+#define mulul MULU.L
+#define bcc BCC
+#define bcs BCS
+#define bls BLS
+#define beq BEQ
+#define bne BNE
+#define bra BRA
+#define dbf DBF
+#define rts RTS
+#define d0 D0
+#define d1 D1
+#define d2 D2
+#define d3 D3
+#define d4 D4
+#define d5 D5
+#define d6 D6
+#define d7 D7
+#define a0 A0
+#define a1 A1
+#define a2 A2
+#define a3 A3
+#define a4 A4
+#define a5 A5
+#define a6 A6
+#define a7 A7
+#define sp SP
+#endif
+
+#ifdef ELF_SYNTAX
+#define PROLOG(name) .type name,@function
+#define EPILOG(name) .size name,.-name
+#define MEM(base)(R(base))
+#define MEM_DISP(base,displacement)(displacement,R(base))
+#define MEM_PREDEC(memory_base)-(R(memory_base))
+#define MEM_POSTINC(memory_base)(R(memory_base))+
+#ifdef __STDC__
+#define R_(r)%##r
+#define R(r)R_(r)
+#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix))
+#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix)
+#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale))
+#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale)
+#define L(label) .##label
+#else
+#define R(r)%/**/r
+#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale)
+#define L(label) ./**/label
+#endif
+#define TEXT .text
+#define ALIGN .align 2
+#define GLOBL .globl
+#define bcc jbcc
+#define bcs jbcs
+#define bls jbls
+#define beq jbeq
+#define bne jbne
+#define bra jbra
+#endif
+
+#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX)
+#define movel move.l
+#define moveml movem.l
+#define moveql moveq.l
+#define cmpl cmp.l
+#define orl or.l
+#define clrl clr.l
+#define eorw eor.w
+#define lsrl lsr.l
+#define lsll lsl.l
+#define roxrl roxr.l
+#define roxll roxl.l
+#define addl add.l
+#define addxl addx.l
+#define addql addq.l
+#define subl sub.l
+#define subxl subx.l
+#define subql subq.l
+#define negl neg.l
+#define mulul mulu.l
+#endif
diff --git a/rts/gmp/mpn/m88k/add_n.s b/rts/gmp/mpn/m88k/add_n.s
new file mode 100644
index 0000000000..0b776c618a
--- /dev/null
+++ b/rts/gmp/mpn/m88k/add_n.s
@@ -0,0 +1,104 @@
+; mc88100 __gmpn_add -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; s2_ptr r4
+; size r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention. As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+ text
+ align 16
+ global ___gmpn_add_n
+___gmpn_add_n:
+ ld r6,r3,0 ; read first limb from s1_ptr
+ extu r10,r5,3
+ ld r7,r4,0 ; read first limb from s2_ptr
+
+ subu.co r5,r0,r5 ; (clear carry as side effect)
+ mak r5,r5,3<4>
+ bcnd eq0,r5,Lzero
+
+ or r12,r0,lo16(Lbase)
+ or.u r12,r12,hi16(Lbase)
+ addu r12,r12,r5 ; r12 is address for entering in loop
+
+ extu r5,r5,2 ; divide by 4
+ subu r2,r2,r5 ; adjust res_ptr
+ subu r3,r3,r5 ; adjust s1_ptr
+ subu r4,r4,r5 ; adjust s2_ptr
+
+ or r8,r6,r0
+
+ jmp.n r12
+ or r9,r7,r0
+
+Loop: addu r3,r3,32
+ st r8,r2,28
+ addu r4,r4,32
+ ld r6,r3,0
+ addu r2,r2,32
+ ld r7,r4,0
+Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt)
+Lbase: ld r8,r3,4
+ addu.cio r6,r6,r7
+ ld r9,r4,4
+ st r6,r2,0
+ ld r6,r3,8 ; add 7 + 8r limbs
+ addu.cio r8,r8,r9
+ ld r7,r4,8
+ st r8,r2,4
+ ld r8,r3,12 ; add 6 + 8r limbs
+ addu.cio r6,r6,r7
+ ld r9,r4,12
+ st r6,r2,8
+ ld r6,r3,16 ; add 5 + 8r limbs
+ addu.cio r8,r8,r9
+ ld r7,r4,16
+ st r8,r2,12
+ ld r8,r3,20 ; add 4 + 8r limbs
+ addu.cio r6,r6,r7
+ ld r9,r4,20
+ st r6,r2,16
+ ld r6,r3,24 ; add 3 + 8r limbs
+ addu.cio r8,r8,r9
+ ld r7,r4,24
+ st r8,r2,20
+ ld r8,r3,28 ; add 2 + 8r limbs
+ addu.cio r6,r6,r7
+ ld r9,r4,28
+ st r6,r2,24
+ bcnd.n ne0,r10,Loop ; add 1 + 8r limbs
+ addu.cio r8,r8,r9
+
+ st r8,r2,28 ; store most significant limb
+
+ jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/m88k/mc88110/add_n.S b/rts/gmp/mpn/m88k/mc88110/add_n.S
new file mode 100644
index 0000000000..843a50dded
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/add_n.S
@@ -0,0 +1,200 @@
+; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size r5
+
+#include "sysdep.h"
+
+ text
+ align 16
+ global C_SYMBOL_NAME(__gmpn_add_n)
+C_SYMBOL_NAME(__gmpn_add_n):
+ addu.co r0,r0,r0 ; clear cy flag
+ xor r12,s2_ptr,res_ptr
+ bb1 2,r12,L1
+; ** V1a **
+L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned?
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ addu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s1_ptr,0
+ ld r12,s1_ptr,4
+ ld.d r8,s2_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1: subu size,size,8
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,16
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,20
+ ld.d r8,s2_ptr,16
+ st.d r6,res_ptr,8
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,24
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,28
+ ld.d r8,s2_ptr,24
+ st.d r6,res_ptr,16
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,32
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,36
+ addu s1_ptr,s1_ptr,32
+ ld.d r8,s2_ptr,32
+ addu s2_ptr,s2_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1
+
+Lfin1: addu size,size,8-2
+ bcnd lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: addu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1
+Lend1: addu.cio r6,r10,r8
+ addu.cio r7,r12,r9
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1
+/* Add last limb */
+ ld r10,s1_ptr,8
+ ld r8,s2_ptr,8
+ addu.cio r6,r10,r8
+ st r6,res_ptr,8
+
+Lret1: jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+
+L1: xor r12,s1_ptr,res_ptr
+ bb1 2,r12,L2
+; ** V1b **
+ or r12,r0,s2_ptr
+ or s2_ptr,r0,s1_ptr
+ or s1_ptr,r0,r12
+ br L0
+
+; ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp r12,size,1
+ bb1 eq,r12,Ljone
+ bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ addu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+
+L_v2: subu size,size,8
+ bcnd lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop2: subu size,size,8
+ ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ addu.cio r8,r8,r6
+ st r8,res_ptr,0
+ addu.cio r9,r9,r7
+ st r9,res_ptr,4
+ ld.d r8,s1_ptr,8
+ ld.d r6,s2_ptr,8
+ addu.cio r8,r8,r6
+ st r8,res_ptr,8
+ addu.cio r9,r9,r7
+ st r9,res_ptr,12
+ ld.d r8,s1_ptr,16
+ ld.d r6,s2_ptr,16
+ addu.cio r8,r8,r6
+ st r8,res_ptr,16
+ addu.cio r9,r9,r7
+ st r9,res_ptr,20
+ ld.d r8,s1_ptr,24
+ ld.d r6,s2_ptr,24
+ addu.cio r8,r8,r6
+ st r8,res_ptr,24
+ addu.cio r9,r9,r7
+ st r9,res_ptr,28
+ addu s1_ptr,s1_ptr,32
+ addu s2_ptr,s2_ptr,32
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop2
+
+Lfin2: addu size,size,8-2
+ bcnd lt0,size,Lend2
+Loope2: ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ addu.cio r8,r8,r6
+ st r8,res_ptr,0
+ addu.cio r9,r9,r7
+ st r9,res_ptr,4
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope2
+Lend2: bb0 0,size,Lret2
+/* Add last limb */
+Ljone: ld r10,s1_ptr,0
+ ld r8,s2_ptr,0
+ addu.cio r6,r10,r8
+ st r6,res_ptr,0
+
+Lret2: jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/m88k/mc88110/addmul_1.s b/rts/gmp/mpn/m88k/mc88110/addmul_1.s
new file mode 100644
index 0000000000..7d97c87c79
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/addmul_1.s
@@ -0,0 +1,61 @@
+; mc88110 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+ text
+ align 16
+ global ___gmpn_addmul_1
+___gmpn_addmul_1:
+ lda r3,r3[r4]
+ lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
+ subu r4,r0,r4
+ addu.co r2,r0,r0 ; r2 = cy = 0
+
+ ld r6,r3[r4]
+ addu r4,r4,1
+ subu r8,r8,4
+ bcnd.n eq0,r4,Lend
+ mulu.d r10,r6,r5
+
+Loop: ld r7,r8[r4]
+ ld r6,r3[r4]
+ addu.cio r9,r11,r2
+ addu.ci r2,r10,r0
+ addu.co r9,r9,r7
+ st r9,r8[r4]
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd ne0,r4,Loop
+
+Lend: ld r7,r8,0
+ addu.cio r9,r11,r2
+ addu.ci r2,r10,r0
+ addu.co r9,r9,r7
+ st r9,r8,0
+ jmp.n r1
+ addu.ci r2,r2,r0
diff --git a/rts/gmp/mpn/m88k/mc88110/mul_1.s b/rts/gmp/mpn/m88k/mc88110/mul_1.s
new file mode 100644
index 0000000000..b8483afa91
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/mul_1.s
@@ -0,0 +1,59 @@
+; mc88110 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+ text
+ align 16
+ global ___gmpn_mul_1
+___gmpn_mul_1:
+ ; Make S1_PTR and RES_PTR point at the end of their blocks
+ ; and negate SIZE.
+ lda r3,r3[r4]
+ lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
+ subu r4,r0,r4
+
+ addu.co r2,r0,r0 ; r2 = cy = 0
+
+ ld r6,r3[r4]
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd.n eq0,r4,Lend
+ subu r8,r8,8
+
+Loop: ld r6,r3[r4]
+ addu.cio r9,r11,r2
+ or r2,r10,r0 ; could be avoided if unrolled
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd.n ne0,r4,Loop
+ st r9,r8[r4]
+
+Lend: addu.cio r9,r11,r2
+ st r9,r8,4
+ jmp.n r1
+ addu.ci r2,r10,r0
diff --git a/rts/gmp/mpn/m88k/mc88110/sub_n.S b/rts/gmp/mpn/m88k/mc88110/sub_n.S
new file mode 100644
index 0000000000..715a3faf25
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/sub_n.S
@@ -0,0 +1,276 @@
+; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size r5
+
+#include "sysdep.h"
+
+ text
+ align 16
+ global C_SYMBOL_NAME(__gmpn_sub_n)
+C_SYMBOL_NAME(__gmpn_sub_n):
+ subu.co r0,r0,r0 ; set cy flag
+ xor r12,s2_ptr,res_ptr
+ bb1 2,r12,L1
+; ** V1a **
+L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ subu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s1_ptr,0
+ ld r12,s1_ptr,4
+ ld.d r8,s2_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1: subu size,size,8
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,16
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,20
+ ld.d r8,s2_ptr,16
+ st.d r6,res_ptr,8
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,24
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,28
+ ld.d r8,s2_ptr,24
+ st.d r6,res_ptr,16
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,32
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,36
+ addu s1_ptr,s1_ptr,32
+ ld.d r8,s2_ptr,32
+ addu s2_ptr,s2_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1
+
+Lfin1: addu size,size,8-2
+ bcnd lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: subu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1
+Lend1: subu.cio r6,r10,r8
+ subu.cio r7,r12,r9
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1
+/* Add last limb */
+ ld r10,s1_ptr,8
+ ld r8,s2_ptr,8
+ subu.cio r6,r10,r8
+ st r6,res_ptr,8
+
+Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
+
+L1: xor r12,s1_ptr,res_ptr
+ bb1 2,r12,L2
+; ** V1b **
+ bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+ ld r10,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ ld r8,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ subu size,size,1
+ subu.co r6,r8,r10
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1b: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s2_ptr,0
+ ld r12,s2_ptr,4
+ ld.d r8,s1_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1b
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1b: subu size,size,8
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,8
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,12
+ ld.d r8,s1_ptr,8
+ st.d r6,res_ptr,0
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,16
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,20
+ ld.d r8,s1_ptr,16
+ st.d r6,res_ptr,8
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,24
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,28
+ ld.d r8,s1_ptr,24
+ st.d r6,res_ptr,16
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,32
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,36
+ addu s2_ptr,s2_ptr,32
+ ld.d r8,s1_ptr,32
+ addu s1_ptr,s1_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1b
+
+Lfin1b: addu size,size,8-2
+ bcnd lt0,size,Lend1b
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subu.cio r6,r8,r10
+ ld r10,s2_ptr,8
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,12
+ ld.d r8,s1_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1b
+Lend1b: subu.cio r6,r8,r10
+ subu.cio r7,r9,r12
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1b
+/* Add last limb */
+ ld r10,s2_ptr,8
+ ld r8,s1_ptr,8
+ subu.cio r6,r8,r10
+ st r6,res_ptr,8
+
+Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
+
+; ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp r12,size,1
+ bb1 eq,r12,Ljone
+ bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ subu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+
+L_v2: subu size,size,8
+ bcnd lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop2: subu size,size,8
+ ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ subu.cio r8,r8,r6
+ st r8,res_ptr,0
+ subu.cio r9,r9,r7
+ st r9,res_ptr,4
+ ld.d r8,s1_ptr,8
+ ld.d r6,s2_ptr,8
+ subu.cio r8,r8,r6
+ st r8,res_ptr,8
+ subu.cio r9,r9,r7
+ st r9,res_ptr,12
+ ld.d r8,s1_ptr,16
+ ld.d r6,s2_ptr,16
+ subu.cio r8,r8,r6
+ st r8,res_ptr,16
+ subu.cio r9,r9,r7
+ st r9,res_ptr,20
+ ld.d r8,s1_ptr,24
+ ld.d r6,s2_ptr,24
+ subu.cio r8,r8,r6
+ st r8,res_ptr,24
+ subu.cio r9,r9,r7
+ st r9,res_ptr,28
+ addu s1_ptr,s1_ptr,32
+ addu s2_ptr,s2_ptr,32
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop2
+
+Lfin2: addu size,size,8-2
+ bcnd lt0,size,Lend2
+Loope2: ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ subu.cio r8,r8,r6
+ st r8,res_ptr,0
+ subu.cio r9,r9,r7
+ st r9,res_ptr,4
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope2
+Lend2: bb0 0,size,Lret2
+/* Add last limb */
+Ljone: ld r10,s1_ptr,0
+ ld r8,s2_ptr,0
+ subu.cio r6,r10,r8
+ st r6,res_ptr,0
+
+Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
diff --git a/rts/gmp/mpn/m88k/mul_1.s b/rts/gmp/mpn/m88k/mul_1.s
new file mode 100644
index 0000000000..06370837ef
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mul_1.s
@@ -0,0 +1,127 @@
+; mc88100 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+; Common overhead is about 11 cycles/invocation.
+
+; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The
+; pipeline stalls 2 cycles due to WB contention.)
+
+; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The
+; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
+
+; To enhance speed:
+; 1. Unroll main loop 4-8 times.
+; 2. Schedule code to avoid WB contention. It might be tempting to move the
+; ld instruction in the loops down to save 2 cycles (less WB contention),
+; but that looses because the ultimate value will be read from outside
+; the allocated space. But if we handle the ultimate multiplication in
+; the tail, we can do this.
+; 3. Make the multiplication with less instructions. I think the code for
+; (S2_LIMB >= 0x10000) is not minimal.
+; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
+; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
+; cycles/limb. (Assuming infinite unrolling.)
+
+ text
+ align 16
+ global ___gmpn_mul_1
+___gmpn_mul_1:
+
+ ; Make S1_PTR and RES_PTR point at the end of their blocks
+ ; and negate SIZE.
+ lda r3,r3[r4]
+ lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
+ subu r4,r0,r4
+
+ addu.co r2,r0,r0 ; r2 = cy = 0
+ ld r9,r3[r4]
+ mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
+ extu r8,r5,16 ; r8 = hi(S2_LIMB)
+ bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
+ subu r6,r6,4
+
+; General code for any value of S2_LIMB.
+
+ ; Make a stack frame and save r25 and r26
+ subu r31,r31,16
+ st.d r25,r31,8
+
+ ; Enter the loop in the middle
+ br.n L1
+ addu r4,r4,1
+
+Loop: ld r9,r3[r4]
+ st r26,r6[r4]
+; bcnd ne0,r0,0 ; bubble
+ addu r4,r4,1
+L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
+ mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
+ mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
+ mul r10,r12,r8 ; r10 = prod_1a mul_3
+ extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
+ mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
+ mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
+ extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
+ addu r10,r10,r11 ; addu_1 WB extu_2
+; bcnd ne0,r0,0 ; bubble WB addu_1
+ addu.co r10,r10,r12 ; WB mul_4
+ mask.u r10,r10,0xffff ; move the 16 most significant bits...
+ addu.ci r10,r10,r0 ; ...to the low half of the word...
+ rot r10,r10,16 ; ...and put carry in pos 16.
+ addu.co r26,r26,r2 ; add old carry limb
+ bcnd.n ne0,r4,Loop
+ addu.ci r2,r25,r10 ; compute new carry limb
+
+ st r26,r6[r4]
+ ld.d r25,r31,8
+ jmp.n r1
+ addu r31,r31,16
+
+; Fast code for S2_LIMB < 0x10000
+Lsmall:
+ ; Enter the loop in the middle
+ br.n SL1
+ addu r4,r4,1
+
+SLoop: ld r9,r3[r4] ;
+ st r8,r6[r4] ;
+ addu r4,r4,1 ;
+SL1: mul r8,r9,r5 ; low word of product
+ mask r12,r9,0xffff ; r12 = lo(s1_limb)
+ extu r13,r9,16 ; r13 = hi(s1_limb)
+ mul r11,r12,r7 ; r11 = prod_0
+ mul r12,r13,r7 ; r12 = prod_1b
+ addu.cio r8,r8,r2 ; add old carry limb
+ extu r10,r11,16 ; r11 = hi(prod_0)
+ addu r10,r10,r12 ;
+ bcnd.n ne0,r4,SLoop
+ extu r2,r10,16 ; r2 = new carry limb
+
+ jmp.n r1
+ st r8,r6[r4]
diff --git a/rts/gmp/mpn/m88k/sub_n.s b/rts/gmp/mpn/m88k/sub_n.s
new file mode 100644
index 0000000000..2fd345a135
--- /dev/null
+++ b/rts/gmp/mpn/m88k/sub_n.s
@@ -0,0 +1,106 @@
+; mc88100 __gmpn_sub -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; s2_ptr r4
+; size r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention. As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+ text
+ align 16
+ global ___gmpn_sub_n
+___gmpn_sub_n:
+ ld r6,r3,0 ; read first limb from s1_ptr
+ extu r10,r5,3
+ ld r7,r4,0 ; read first limb from s2_ptr
+
+ subu r5,r0,r5
+ mak r5,r5,3<4>
+ bcnd.n eq0,r5,Lzero
+ subu.co r0,r0,r0 ; initialize carry
+
+ or r12,r0,lo16(Lbase)
+ or.u r12,r12,hi16(Lbase)
+ addu r12,r12,r5 ; r12 is address for entering in loop
+
+ extu r5,r5,2 ; divide by 4
+ subu r2,r2,r5 ; adjust res_ptr
+ subu r3,r3,r5 ; adjust s1_ptr
+ subu r4,r4,r5 ; adjust s2_ptr
+
+ or r8,r6,r0
+
+ jmp.n r12
+ or r9,r7,r0
+
+Loop: addu r3,r3,32
+ st r8,r2,28
+ addu r4,r4,32
+ ld r6,r3,0
+ addu r2,r2,32
+ ld r7,r4,0
+Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt)
+Lbase: ld r8,r3,4
+ subu.cio r6,r6,r7
+ ld r9,r4,4
+ st r6,r2,0
+ ld r6,r3,8 ; subtract 7 + 8r limbs
+ subu.cio r8,r8,r9
+ ld r7,r4,8
+ st r8,r2,4
+ ld r8,r3,12 ; subtract 6 + 8r limbs
+ subu.cio r6,r6,r7
+ ld r9,r4,12
+ st r6,r2,8
+ ld r6,r3,16 ; subtract 5 + 8r limbs
+ subu.cio r8,r8,r9
+ ld r7,r4,16
+ st r8,r2,12
+ ld r8,r3,20 ; subtract 4 + 8r limbs
+ subu.cio r6,r6,r7
+ ld r9,r4,20
+ st r6,r2,16
+ ld r6,r3,24 ; subtract 3 + 8r limbs
+ subu.cio r8,r8,r9
+ ld r7,r4,24
+ st r8,r2,20
+ ld r8,r3,28 ; subtract 2 + 8r limbs
+ subu.cio r6,r6,r7
+ ld r9,r4,28
+ st r6,r2,24
+ bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs
+ subu.cio r8,r8,r9
+
+ st r8,r2,28 ; store most significant limb
+
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
diff --git a/rts/gmp/mpn/mips2/add_n.s b/rts/gmp/mpn/mips2/add_n.s
new file mode 100644
index 0000000000..5c3c7fc8a1
--- /dev/null
+++ b/rts/gmp/mpn/mips2/add_n.s
@@ -0,0 +1,120 @@
+ # MIPS2 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_add_n
+ .ent __gmpn_add_n
+__gmpn_add_n:
+ .set noreorder
+ .set nomacro
+
+ lw $10,0($5)
+ lw $11,0($6)
+
+ addiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ subu $7,$7,$9
+
+.Loop0: addiu $9,$9,-1
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,0($4)
+ or $2,$2,$8
+
+ addiu $5,$5,4
+ addiu $6,$6,4
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ addiu $4,$4,4
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: addiu $7,$7,-4
+
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,0($4)
+ or $2,$2,$8
+
+ lw $10,8($5)
+ addu $13,$13,$2
+ lw $11,8($6)
+ sltu $8,$13,$2
+ addu $13,$12,$13
+ sltu $2,$13,$12
+ sw $13,4($4)
+ or $2,$2,$8
+
+ lw $12,12($5)
+ addu $11,$11,$2
+ lw $13,12($6)
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,8($4)
+ or $2,$2,$8
+
+ lw $10,16($5)
+ addu $13,$13,$2
+ lw $11,16($6)
+ sltu $8,$13,$2
+ addu $13,$12,$13
+ sltu $2,$13,$12
+ sw $13,12($4)
+ or $2,$2,$8
+
+ addiu $5,$5,16
+ addiu $6,$6,16
+
+ bne $7,$0,.Loop
+ addiu $4,$4,16
+
+.Lend: addu $11,$11,$2
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_add_n
diff --git a/rts/gmp/mpn/mips2/addmul_1.s b/rts/gmp/mpn/mips2/addmul_1.s
new file mode 100644
index 0000000000..1e5037751b
--- /dev/null
+++ b/rts/gmp/mpn/mips2/addmul_1.s
@@ -0,0 +1,97 @@
+ # MIPS __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+ # add the product to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_addmul_1
+ .ent __gmpn_addmul_1
+__gmpn_addmul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ lw $8,0($5)
+
+ # warm up phase 1
+ addiu $5,$5,4
+ multu $8,$7
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC1
+ lw $8,0($5) # load new s1 limb as early as possible
+
+Loop: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addiu $5,$5,4
+ addu $3,$3,$2 # add old carry limb to low product limb
+ multu $8,$7
+ lw $8,0($5) # load new s1 limb as early as possible
+ addiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ addu $3,$10,$3
+ sltu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ bne $6,$0,Loop
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ multu $8,$7
+ addu $3,$10,$3
+ sltu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ addu $3,$10,$3
+ sltu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ j $31
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_addmul_1
diff --git a/rts/gmp/mpn/mips2/lshift.s b/rts/gmp/mpn/mips2/lshift.s
new file mode 100644
index 0000000000..2ca3a3c800
--- /dev/null
+++ b/rts/gmp/mpn/mips2/lshift.s
@@ -0,0 +1,95 @@
+ # MIPS2 __gmpn_lshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_lshift
+ .ent __gmpn_lshift
+__gmpn_lshift:
+ .set noreorder
+ .set nomacro
+
+ sll $2,$6,2
+ addu $5,$5,$2 # make r5 point at end of src
+ lw $10,-4($5) # load first limb
+ subu $13,$0,$7
+ addu $4,$4,$2 # make r4 point at end of res
+ addiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ srl $2,$10,$13 # compute function result
+
+ subu $6,$6,$9
+
+.Loop0: lw $3,-8($5)
+ addiu $4,$4,-4
+ addiu $5,$5,-4
+ addiu $9,$9,-1
+ sll $11,$10,$7
+ srl $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sw $8,0($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: lw $3,-8($5)
+ addiu $4,$4,-16
+ addiu $6,$6,-4
+ sll $11,$10,$7
+ srl $12,$3,$13
+
+ lw $10,-12($5)
+ sll $14,$3,$7
+ or $8,$11,$12
+ sw $8,12($4)
+ srl $9,$10,$13
+
+ lw $3,-16($5)
+ sll $11,$10,$7
+ or $8,$14,$9
+ sw $8,8($4)
+ srl $12,$3,$13
+
+ lw $10,-20($5)
+ sll $14,$3,$7
+ or $8,$11,$12
+ sw $8,4($4)
+ srl $9,$10,$13
+
+ addiu $5,$5,-16
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sw $8,0($4)
+
+.Lend: sll $8,$10,$7
+ j $31
+ sw $8,-4($4)
+ .end __gmpn_lshift
diff --git a/rts/gmp/mpn/mips2/mul_1.s b/rts/gmp/mpn/mips2/mul_1.s
new file mode 100644
index 0000000000..ea8aa26809
--- /dev/null
+++ b/rts/gmp/mpn/mips2/mul_1.s
@@ -0,0 +1,85 @@
+ # MIPS __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+ # store the product in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_mul_1
+ .ent __gmpn_mul_1
+__gmpn_mul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ lw $8,0($5)
+
+ # warm up phase 1
+ addiu $5,$5,4
+ multu $8,$7
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC1
+ lw $8,0($5) # load new s1 limb as early as possible
+
+Loop: mflo $10
+ mfhi $9
+ addiu $5,$5,4
+ addu $10,$10,$2 # add old carry limb to low product limb
+ multu $8,$7
+ lw $8,0($5) # load new s1 limb as early as possible
+ addiu $6,$6,-1 # decrement loop counter
+ sltu $2,$10,$2 # carry from previous addition -> $2
+ sw $10,0($4)
+ addiu $4,$4,4
+ bne $6,$0,Loop
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: mflo $10
+ mfhi $9
+ addu $10,$10,$2
+ sltu $2,$10,$2
+ multu $8,$7
+ sw $10,0($4)
+ addiu $4,$4,4
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: mflo $10
+ mfhi $9
+ addu $10,$10,$2
+ sltu $2,$10,$2
+ sw $10,0($4)
+ j $31
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_mul_1
diff --git a/rts/gmp/mpn/mips2/rshift.s b/rts/gmp/mpn/mips2/rshift.s
new file mode 100644
index 0000000000..37c8f39cb4
--- /dev/null
+++ b/rts/gmp/mpn/mips2/rshift.s
@@ -0,0 +1,92 @@
+ # MIPS2 __gmpn_rshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_rshift
+ .ent __gmpn_rshift
+__gmpn_rshift:
+ .set noreorder
+ .set nomacro
+
+ lw $10,0($5) # load first limb
+ subu $13,$0,$7
+ addiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ sll $2,$10,$13 # compute function result
+
+ subu $6,$6,$9
+
+.Loop0: lw $3,4($5)
+ addiu $4,$4,4
+ addiu $5,$5,4
+ addiu $9,$9,-1
+ srl $11,$10,$7
+ sll $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sw $8,-4($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: lw $3,4($5)
+ addiu $4,$4,16
+ addiu $6,$6,-4
+ srl $11,$10,$7
+ sll $12,$3,$13
+
+ lw $10,8($5)
+ srl $14,$3,$7
+ or $8,$11,$12
+ sw $8,-16($4)
+ sll $9,$10,$13
+
+ lw $3,12($5)
+ srl $11,$10,$7
+ or $8,$14,$9
+ sw $8,-12($4)
+ sll $12,$3,$13
+
+ lw $10,16($5)
+ srl $14,$3,$7
+ or $8,$11,$12
+ sw $8,-8($4)
+ sll $9,$10,$13
+
+ addiu $5,$5,16
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sw $8,-4($4)
+
+.Lend: srl $8,$10,$7
+ j $31
+ sw $8,0($4)
+ .end __gmpn_rshift
diff --git a/rts/gmp/mpn/mips2/sub_n.s b/rts/gmp/mpn/mips2/sub_n.s
new file mode 100644
index 0000000000..51d34f3ac3
--- /dev/null
+++ b/rts/gmp/mpn/mips2/sub_n.s
@@ -0,0 +1,120 @@
+ # MIPS2 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_sub_n
+ .ent __gmpn_sub_n
+__gmpn_sub_n:
+ .set noreorder
+ .set nomacro
+
+ lw $10,0($5)
+ lw $11,0($6)
+
+ addiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ subu $7,$7,$9
+
+.Loop0: addiu $9,$9,-1
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,0($4)
+ or $2,$2,$8
+
+ addiu $5,$5,4
+ addiu $6,$6,4
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ addiu $4,$4,4
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: addiu $7,$7,-4
+
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,0($4)
+ or $2,$2,$8
+
+ lw $10,8($5)
+ addu $13,$13,$2
+ lw $11,8($6)
+ sltu $8,$13,$2
+ subu $13,$12,$13
+ sltu $2,$12,$13
+ sw $13,4($4)
+ or $2,$2,$8
+
+ lw $12,12($5)
+ addu $11,$11,$2
+ lw $13,12($6)
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,8($4)
+ or $2,$2,$8
+
+ lw $10,16($5)
+ addu $13,$13,$2
+ lw $11,16($6)
+ sltu $8,$13,$2
+ subu $13,$12,$13
+ sltu $2,$12,$13
+ sw $13,12($4)
+ or $2,$2,$8
+
+ addiu $5,$5,16
+ addiu $6,$6,16
+
+ bne $7,$0,.Loop
+ addiu $4,$4,16
+
+.Lend: addu $11,$11,$2
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_sub_n
diff --git a/rts/gmp/mpn/mips2/submul_1.s b/rts/gmp/mpn/mips2/submul_1.s
new file mode 100644
index 0000000000..495dea3ba2
--- /dev/null
+++ b/rts/gmp/mpn/mips2/submul_1.s
@@ -0,0 +1,97 @@
+ # MIPS __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+ # subtract the product from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_submul_1
+ .ent __gmpn_submul_1
+__gmpn_submul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ lw $8,0($5)
+
+ # warm up phase 1
+ addiu $5,$5,4
+ multu $8,$7
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC1
+ lw $8,0($5) # load new s1 limb as early as possible
+
+Loop: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addiu $5,$5,4
+ addu $3,$3,$2 # add old carry limb to low product limb
+ multu $8,$7
+ lw $8,0($5) # load new s1 limb as early as possible
+ addiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ subu $3,$10,$3
+ sgtu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ bne $6,$0,Loop
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ multu $8,$7
+ subu $3,$10,$3
+ sgtu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ subu $3,$10,$3
+ sgtu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ j $31
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_submul_1
diff --git a/rts/gmp/mpn/mips2/umul.s b/rts/gmp/mpn/mips2/umul.s
new file mode 100644
index 0000000000..40e847614c
--- /dev/null
+++ b/rts/gmp/mpn/mips2/umul.s
@@ -0,0 +1,30 @@
+ # Copyright (C) 1999 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+ .text
+ .align 2
+ .globl __umul_ppmm
+ .ent __umul_ppmm
+__umul_ppmm:
+ multu $5,$6
+ mflo $3
+ mfhi $2
+ sw $3,0($4)
+ j $31
+ .end __umul_ppmm
diff --git a/rts/gmp/mpn/mips3/README b/rts/gmp/mpn/mips3/README
new file mode 100644
index 0000000000..e94b2c7460
--- /dev/null
+++ b/rts/gmp/mpn/mips3/README
@@ -0,0 +1,23 @@
+This directory contains mpn functions optimized for MIPS3. Example of
+processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. On the R4000 and R4400, branches, both the plain and the "likely" ones,
+ take 3 cycles to execute. (The fastest possible loop will take 4 cycles,
+ because of the delay insn.)
+
+ On the R4600, branches takes a single cycle
+
+ On the R8000, branches often take no noticable cycles, as they are
+ executed in a separate function unit..
+
+2. The R4000 and R4400 have a load latency of 4 cycles.
+
+3. On the R4000 and R4400, multiplies take a data-dependent number of
+ cycles, contrary to the SGI documentation. There seem to be 3 or 4
+ possible latencies.
+
+STATUS
+
+Good...
diff --git a/rts/gmp/mpn/mips3/add_n.s b/rts/gmp/mpn/mips3/add_n.s
new file mode 100644
index 0000000000..adad0beaef
--- /dev/null
+++ b/rts/gmp/mpn/mips3/add_n.s
@@ -0,0 +1,120 @@
+ # MIPS3 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_add_n
+ .ent __gmpn_add_n
+__gmpn_add_n:
+ .set noreorder
+ .set nomacro
+
+ ld $10,0($5)
+ ld $11,0($6)
+
+ daddiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ dsubu $7,$7,$9
+
+.Loop0: daddiu $9,$9,-1
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,0($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,8
+ daddiu $6,$6,8
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ daddiu $4,$4,8
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: daddiu $7,$7,-4
+
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,0($4)
+ or $2,$2,$8
+
+ ld $10,16($5)
+ daddu $13,$13,$2
+ ld $11,16($6)
+ sltu $8,$13,$2
+ daddu $13,$12,$13
+ sltu $2,$13,$12
+ sd $13,8($4)
+ or $2,$2,$8
+
+ ld $12,24($5)
+ daddu $11,$11,$2
+ ld $13,24($6)
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,16($4)
+ or $2,$2,$8
+
+ ld $10,32($5)
+ daddu $13,$13,$2
+ ld $11,32($6)
+ sltu $8,$13,$2
+ daddu $13,$12,$13
+ sltu $2,$13,$12
+ sd $13,24($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,32
+ daddiu $6,$6,32
+
+ bne $7,$0,.Loop
+ daddiu $4,$4,32
+
+.Lend: daddu $11,$11,$2
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_add_n
diff --git a/rts/gmp/mpn/mips3/addmul_1.s b/rts/gmp/mpn/mips3/addmul_1.s
new file mode 100644
index 0000000000..d390e2298e
--- /dev/null
+++ b/rts/gmp/mpn/mips3/addmul_1.s
@@ -0,0 +1,97 @@
+ # MIPS3 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+ # add the product to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_addmul_1
+ .ent __gmpn_addmul_1
+__gmpn_addmul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ ld $8,0($5)
+
+ # warm up phase 1
+ daddiu $5,$5,8
+ dmultu $8,$7
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC1
+ ld $8,0($5) # load new s1 limb as early as possible
+
+Loop: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddiu $5,$5,8
+ daddu $3,$3,$2 # add old carry limb to low product limb
+ dmultu $8,$7
+ ld $8,0($5) # load new s1 limb as early as possible
+ daddiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ daddu $3,$10,$3
+ sltu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ bne $6,$0,Loop
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ dmultu $8,$7
+ daddu $3,$10,$3
+ sltu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ daddu $3,$10,$3
+ sltu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ j $31
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_addmul_1
diff --git a/rts/gmp/mpn/mips3/gmp-mparam.h b/rts/gmp/mpn/mips3/gmp-mparam.h
new file mode 100644
index 0000000000..656e90c7b0
--- /dev/null
+++ b/rts/gmp/mpn/mips3/gmp-mparam.h
@@ -0,0 +1,58 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the R10000 usign the system cc. */
+/* Generated by tuneup.c, 2000-07-25. */
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 16
+#endif
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 32
+#endif
+
+/* Supressed the TOOM3 values as they looked absolutely crazy
+ (698 and 21 respectively) */
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 58
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 54
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 82
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 159
+#endif
diff --git a/rts/gmp/mpn/mips3/lshift.s b/rts/gmp/mpn/mips3/lshift.s
new file mode 100644
index 0000000000..372606fddf
--- /dev/null
+++ b/rts/gmp/mpn/mips3/lshift.s
@@ -0,0 +1,95 @@
+ # MIPS3 __gmpn_lshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_lshift
+ .ent __gmpn_lshift
+__gmpn_lshift:
+ .set noreorder
+ .set nomacro
+
+ dsll $2,$6,3
+ daddu $5,$5,$2 # make r5 point at end of src
+ ld $10,-8($5) # load first limb
+ dsubu $13,$0,$7
+ daddu $4,$4,$2 # make r4 point at end of res
+ daddiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ dsrl $2,$10,$13 # compute function result
+
+ dsubu $6,$6,$9
+
+.Loop0: ld $3,-16($5)
+ daddiu $4,$4,-8
+ daddiu $5,$5,-8
+ daddiu $9,$9,-1
+ dsll $11,$10,$7
+ dsrl $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sd $8,0($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: ld $3,-16($5)
+ daddiu $4,$4,-32
+ daddiu $6,$6,-4
+ dsll $11,$10,$7
+ dsrl $12,$3,$13
+
+ ld $10,-24($5)
+ dsll $14,$3,$7
+ or $8,$11,$12
+ sd $8,24($4)
+ dsrl $9,$10,$13
+
+ ld $3,-32($5)
+ dsll $11,$10,$7
+ or $8,$14,$9
+ sd $8,16($4)
+ dsrl $12,$3,$13
+
+ ld $10,-40($5)
+ dsll $14,$3,$7
+ or $8,$11,$12
+ sd $8,8($4)
+ dsrl $9,$10,$13
+
+ daddiu $5,$5,-32
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sd $8,0($4)
+
+.Lend: dsll $8,$10,$7
+ j $31
+ sd $8,-8($4)
+ .end __gmpn_lshift
diff --git a/rts/gmp/mpn/mips3/mul_1.s b/rts/gmp/mpn/mips3/mul_1.s
new file mode 100644
index 0000000000..6659e2b4eb
--- /dev/null
+++ b/rts/gmp/mpn/mips3/mul_1.s
@@ -0,0 +1,85 @@
+ # MIPS3 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+ # store the product in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_mul_1
+ .ent __gmpn_mul_1
+__gmpn_mul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ ld $8,0($5)
+
+ # warm up phase 1
+ daddiu $5,$5,8
+ dmultu $8,$7
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC1
+ ld $8,0($5) # load new s1 limb as early as possible
+
+Loop: mflo $10
+ mfhi $9
+ daddiu $5,$5,8
+ daddu $10,$10,$2 # add old carry limb to low product limb
+ dmultu $8,$7
+ ld $8,0($5) # load new s1 limb as early as possible
+ daddiu $6,$6,-1 # decrement loop counter
+ sltu $2,$10,$2 # carry from previous addition -> $2
+ sd $10,0($4)
+ daddiu $4,$4,8
+ bne $6,$0,Loop
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: mflo $10
+ mfhi $9
+ daddu $10,$10,$2
+ sltu $2,$10,$2
+ dmultu $8,$7
+ sd $10,0($4)
+ daddiu $4,$4,8
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: mflo $10
+ mfhi $9
+ daddu $10,$10,$2
+ sltu $2,$10,$2
+ sd $10,0($4)
+ j $31
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_mul_1
diff --git a/rts/gmp/mpn/mips3/rshift.s b/rts/gmp/mpn/mips3/rshift.s
new file mode 100644
index 0000000000..59c7fd3492
--- /dev/null
+++ b/rts/gmp/mpn/mips3/rshift.s
@@ -0,0 +1,92 @@
+ # MIPS3 __gmpn_rshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_rshift
+ .ent __gmpn_rshift
+__gmpn_rshift:
+ .set noreorder
+ .set nomacro
+
+ ld $10,0($5) # load first limb
+ dsubu $13,$0,$7
+ daddiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ dsll $2,$10,$13 # compute function result
+
+ dsubu $6,$6,$9
+
+.Loop0: ld $3,8($5)
+ daddiu $4,$4,8
+ daddiu $5,$5,8
+ daddiu $9,$9,-1
+ dsrl $11,$10,$7
+ dsll $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sd $8,-8($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: ld $3,8($5)
+ daddiu $4,$4,32
+ daddiu $6,$6,-4
+ dsrl $11,$10,$7
+ dsll $12,$3,$13
+
+ ld $10,16($5)
+ dsrl $14,$3,$7
+ or $8,$11,$12
+ sd $8,-32($4)
+ dsll $9,$10,$13
+
+ ld $3,24($5)
+ dsrl $11,$10,$7
+ or $8,$14,$9
+ sd $8,-24($4)
+ dsll $12,$3,$13
+
+ ld $10,32($5)
+ dsrl $14,$3,$7
+ or $8,$11,$12
+ sd $8,-16($4)
+ dsll $9,$10,$13
+
+ daddiu $5,$5,32
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sd $8,-8($4)
+
+.Lend: dsrl $8,$10,$7
+ j $31
+ sd $8,0($4)
+ .end __gmpn_rshift
diff --git a/rts/gmp/mpn/mips3/sub_n.s b/rts/gmp/mpn/mips3/sub_n.s
new file mode 100644
index 0000000000..c57c824b04
--- /dev/null
+++ b/rts/gmp/mpn/mips3/sub_n.s
@@ -0,0 +1,120 @@
+ # MIPS3 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_sub_n
+ .ent __gmpn_sub_n
+__gmpn_sub_n:
+ .set noreorder
+ .set nomacro
+
+ ld $10,0($5)
+ ld $11,0($6)
+
+ daddiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ dsubu $7,$7,$9
+
+.Loop0: daddiu $9,$9,-1
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,0($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,8
+ daddiu $6,$6,8
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ daddiu $4,$4,8
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: daddiu $7,$7,-4
+
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,0($4)
+ or $2,$2,$8
+
+ ld $10,16($5)
+ daddu $13,$13,$2
+ ld $11,16($6)
+ sltu $8,$13,$2
+ dsubu $13,$12,$13
+ sltu $2,$12,$13
+ sd $13,8($4)
+ or $2,$2,$8
+
+ ld $12,24($5)
+ daddu $11,$11,$2
+ ld $13,24($6)
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,16($4)
+ or $2,$2,$8
+
+ ld $10,32($5)
+ daddu $13,$13,$2
+ ld $11,32($6)
+ sltu $8,$13,$2
+ dsubu $13,$12,$13
+ sltu $2,$12,$13
+ sd $13,24($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,32
+ daddiu $6,$6,32
+
+ bne $7,$0,.Loop
+ daddiu $4,$4,32
+
+.Lend: daddu $11,$11,$2
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_sub_n
diff --git a/rts/gmp/mpn/mips3/submul_1.s b/rts/gmp/mpn/mips3/submul_1.s
new file mode 100644
index 0000000000..531f9705a6
--- /dev/null
+++ b/rts/gmp/mpn/mips3/submul_1.s
@@ -0,0 +1,97 @@
+ # MIPS3 __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+ # subtract the product from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_submul_1
+ .ent __gmpn_submul_1
+__gmpn_submul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ ld $8,0($5)
+
+ # warm up phase 1
+ daddiu $5,$5,8
+ dmultu $8,$7
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC1
+ ld $8,0($5) # load new s1 limb as early as possible
+
+Loop: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddiu $5,$5,8
+ daddu $3,$3,$2 # add old carry limb to low product limb
+ dmultu $8,$7
+ ld $8,0($5) # load new s1 limb as early as possible
+ daddiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ dsubu $3,$10,$3
+ sgtu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ bne $6,$0,Loop
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ dmultu $8,$7
+ dsubu $3,$10,$3
+ sgtu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ dsubu $3,$10,$3
+ sgtu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ j $31
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_submul_1
diff --git a/rts/gmp/mpn/mp_bases.c b/rts/gmp/mpn/mp_bases.c
new file mode 100644
index 0000000000..011c328c80
--- /dev/null
+++ b/rts/gmp/mpn/mp_bases.c
@@ -0,0 +1,550 @@
+/* __mp_bases -- Structure for conversion between internal binary
+ format and strings in base 2..255. The fields are explained in
+ gmp-impl.h.
+
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if BITS_PER_MP_LIMB == 32
+const struct bases __mp_bases[256] =
+{
+ /* 0 */ {0, 0.0, 0, 0},
+ /* 1 */ {0, 1e38, 0, 0},
+ /* 2 */ {32, 1.0000000000000000, 0x1, 0x0},
+ /* 3 */ {20, 0.6309297535714575, 0xcfd41b91, 0x3b563c24},
+ /* 4 */ {16, 0.5000000000000000, 0x2, 0x0},
+ /* 5 */ {13, 0.4306765580733931, 0x48c27395, 0xc25c2684},
+ /* 6 */ {12, 0.3868528072345416, 0x81bf1000, 0xf91bd1b6},
+ /* 7 */ {11, 0.3562071871080222, 0x75db9c97, 0x1607a2cb},
+ /* 8 */ {10, 0.3333333333333334, 0x3, 0x0},
+ /* 9 */ {10, 0.3154648767857287, 0xcfd41b91, 0x3b563c24},
+ /* 10 */ {9, 0.3010299956639811, 0x3b9aca00, 0x12e0be82},
+ /* 11 */ {9, 0.2890648263178878, 0x8c8b6d2b, 0xd24cde04},
+ /* 12 */ {8, 0.2789429456511298, 0x19a10000, 0x3fa39ab5},
+ /* 13 */ {8, 0.2702381544273197, 0x309f1021, 0x50f8ac5f},
+ /* 14 */ {8, 0.2626495350371936, 0x57f6c100, 0x74843b1e},
+ /* 15 */ {8, 0.2559580248098155, 0x98c29b81, 0xad0326c2},
+ /* 16 */ {8, 0.2500000000000000, 0x4, 0x0},
+ /* 17 */ {7, 0.2446505421182260, 0x18754571, 0x4ef0b6bd},
+ /* 18 */ {7, 0.2398124665681315, 0x247dbc80, 0xc0fc48a1},
+ /* 19 */ {7, 0.2354089133666382, 0x3547667b, 0x33838942},
+ /* 20 */ {7, 0.2313782131597592, 0x4c4b4000, 0xad7f29ab},
+ /* 21 */ {7, 0.2276702486969530, 0x6b5a6e1d, 0x313c3d15},
+ /* 22 */ {7, 0.2242438242175754, 0x94ace180, 0xb8cca9e0},
+ /* 23 */ {7, 0.2210647294575037, 0xcaf18367, 0x42ed6de9},
+ /* 24 */ {6, 0.2181042919855316, 0xb640000, 0x67980e0b},
+ /* 25 */ {6, 0.2153382790366965, 0xe8d4a51, 0x19799812},
+ /* 26 */ {6, 0.2127460535533632, 0x1269ae40, 0xbce85396},
+ /* 27 */ {6, 0.2103099178571525, 0x17179149, 0x62c103a9},
+ /* 28 */ {6, 0.2080145976765095, 0x1cb91000, 0x1d353d43},
+ /* 29 */ {6, 0.2058468324604344, 0x23744899, 0xce1decea},
+ /* 30 */ {6, 0.2037950470905062, 0x2b73a840, 0x790fc511},
+ /* 31 */ {6, 0.2018490865820999, 0x34e63b41, 0x35b865a0},
+ /* 32 */ {6, 0.2000000000000000, 0x5, 0x0},
+ /* 33 */ {6, 0.1982398631705605, 0x4cfa3cc1, 0xa9aed1b3},
+ /* 34 */ {6, 0.1965616322328226, 0x5c13d840, 0x63dfc229},
+ /* 35 */ {6, 0.1949590218937863, 0x6d91b519, 0x2b0fee30},
+ /* 36 */ {6, 0.1934264036172708, 0x81bf1000, 0xf91bd1b6},
+ /* 37 */ {6, 0.1919587200065601, 0x98ede0c9, 0xac89c3a9},
+ /* 38 */ {6, 0.1905514124267734, 0xb3773e40, 0x6d2c32fe},
+ /* 39 */ {6, 0.1892003595168700, 0xd1bbc4d1, 0x387907c9},
+ /* 40 */ {6, 0.1879018247091076, 0xf4240000, 0xc6f7a0b},
+ /* 41 */ {5, 0.1866524112389434, 0x6e7d349, 0x28928154},
+ /* 42 */ {5, 0.1854490234153689, 0x7ca30a0, 0x6e8629d},
+ /* 43 */ {5, 0.1842888331487062, 0x8c32bbb, 0xd373dca0},
+ /* 44 */ {5, 0.1831692509136336, 0x9d46c00, 0xa0b17895},
+ /* 45 */ {5, 0.1820879004699383, 0xaffacfd, 0x746811a5},
+ /* 46 */ {5, 0.1810425967800402, 0xc46bee0, 0x4da6500f},
+ /* 47 */ {5, 0.1800313266566926, 0xdab86ef, 0x2ba23582},
+ /* 48 */ {5, 0.1790522317510414, 0xf300000, 0xdb20a88},
+ /* 49 */ {5, 0.1781035935540111, 0x10d63af1, 0xe68d5ce4},
+ /* 50 */ {5, 0.1771838201355579, 0x12a05f20, 0xb7cdfd9d},
+ /* 51 */ {5, 0.1762914343888821, 0x1490aae3, 0x8e583933},
+ /* 52 */ {5, 0.1754250635819545, 0x16a97400, 0x697cc3ea},
+ /* 53 */ {5, 0.1745834300480449, 0x18ed2825, 0x48a5ca6c},
+ /* 54 */ {5, 0.1737653428714400, 0x1b5e4d60, 0x2b52db16},
+ /* 55 */ {5, 0.1729696904450771, 0x1dff8297, 0x111586a6},
+ /* 56 */ {5, 0.1721954337940981, 0x20d38000, 0xf31d2b36},
+ /* 57 */ {5, 0.1714416005739134, 0x23dd1799, 0xc8d76d19},
+ /* 58 */ {5, 0.1707072796637201, 0x271f35a0, 0xa2cb1eb4},
+ /* 59 */ {5, 0.1699916162869140, 0x2a9ce10b, 0x807c3ec3},
+ /* 60 */ {5, 0.1692938075987814, 0x2e593c00, 0x617ec8bf},
+ /* 61 */ {5, 0.1686130986895011, 0x3257844d, 0x45746cbe},
+ /* 62 */ {5, 0.1679487789570419, 0x369b13e0, 0x2c0aa273},
+ /* 63 */ {5, 0.1673001788101741, 0x3b27613f, 0x14f90805},
+ /* 64 */ {5, 0.1666666666666667, 0x6, 0x0},
+ /* 65 */ {5, 0.1660476462159378, 0x4528a141, 0xd9cf0829},
+ /* 66 */ {5, 0.1654425539190583, 0x4aa51420, 0xb6fc4841},
+ /* 67 */ {5, 0.1648508567221604, 0x50794633, 0x973054cb},
+ /* 68 */ {5, 0.1642720499620502, 0x56a94400, 0x7a1dbe4b},
+ /* 69 */ {5, 0.1637056554452156, 0x5d393975, 0x5f7fcd7f},
+ /* 70 */ {5, 0.1631512196835108, 0x642d7260, 0x47196c84},
+ /* 71 */ {5, 0.1626083122716341, 0x6b8a5ae7, 0x30b43635},
+ /* 72 */ {5, 0.1620765243931223, 0x73548000, 0x1c1fa5f6},
+ /* 73 */ {5, 0.1615554674429964, 0x7b908fe9, 0x930634a},
+ /* 74 */ {5, 0.1610447717564445, 0x84435aa0, 0xef7f4a3c},
+ /* 75 */ {5, 0.1605440854340214, 0x8d71d25b, 0xcf5552d2},
+ /* 76 */ {5, 0.1600530732548213, 0x97210c00, 0xb1a47c8e},
+ /* 77 */ {5, 0.1595714156699382, 0xa1563f9d, 0x9634b43e},
+ /* 78 */ {5, 0.1590988078692941, 0xac16c8e0, 0x7cd3817d},
+ /* 79 */ {5, 0.1586349589155960, 0xb768278f, 0x65536761},
+ /* 80 */ {5, 0.1581795909397823, 0xc3500000, 0x4f8b588e},
+ /* 81 */ {5, 0.1577324383928644, 0xcfd41b91, 0x3b563c24},
+ /* 82 */ {5, 0.1572932473495469, 0xdcfa6920, 0x28928154},
+ /* 83 */ {5, 0.1568617748594410, 0xeac8fd83, 0x1721bfb0},
+ /* 84 */ {5, 0.1564377883420716, 0xf9461400, 0x6e8629d},
+ /* 85 */ {4, 0.1560210650222250, 0x31c84b1, 0x491cc17c},
+ /* 86 */ {4, 0.1556113914024940, 0x342ab10, 0x3a11d83b},
+ /* 87 */ {4, 0.1552085627701551, 0x36a2c21, 0x2be074cd},
+ /* 88 */ {4, 0.1548123827357682, 0x3931000, 0x1e7a02e7},
+ /* 89 */ {4, 0.1544226628011101, 0x3bd5ee1, 0x11d10edd},
+ /* 90 */ {4, 0.1540392219542636, 0x3e92110, 0x5d92c68},
+ /* 91 */ {4, 0.1536618862898642, 0x4165ef1, 0xf50dbfb2},
+ /* 92 */ {4, 0.1532904886526781, 0x4452100, 0xdf9f1316},
+ /* 93 */ {4, 0.1529248683028321, 0x4756fd1, 0xcb52a684},
+ /* 94 */ {4, 0.1525648706011593, 0x4a75410, 0xb8163e97},
+ /* 95 */ {4, 0.1522103467132434, 0x4dad681, 0xa5d8f269},
+ /* 96 */ {4, 0.1518611533308632, 0x5100000, 0x948b0fcd},
+ /* 97 */ {4, 0.1515171524096389, 0x546d981, 0x841e0215},
+ /* 98 */ {4, 0.1511782109217764, 0x57f6c10, 0x74843b1e},
+ /* 99 */ {4, 0.1508442006228941, 0x5b9c0d1, 0x65b11e6e},
+ /* 100 */ {4, 0.1505149978319906, 0x5f5e100, 0x5798ee23},
+ /* 101 */ {4, 0.1501904832236879, 0x633d5f1, 0x4a30b99b},
+ /* 102 */ {4, 0.1498705416319474, 0x673a910, 0x3d6e4d94},
+ /* 103 */ {4, 0.1495550618645152, 0x6b563e1, 0x314825b0},
+ /* 104 */ {4, 0.1492439365274121, 0x6f91000, 0x25b55f2e},
+ /* 105 */ {4, 0.1489370618588283, 0x73eb721, 0x1aadaccb},
+ /* 106 */ {4, 0.1486343375718350, 0x7866310, 0x10294ba2},
+ /* 107 */ {4, 0.1483356667053617, 0x7d01db1, 0x620f8f6},
+ /* 108 */ {4, 0.1480409554829326, 0x81bf100, 0xf91bd1b6},
+ /* 109 */ {4, 0.1477501131786861, 0x869e711, 0xe6d37b2a},
+ /* 110 */ {4, 0.1474630519902391, 0x8ba0a10, 0xd55cff6e},
+ /* 111 */ {4, 0.1471796869179852, 0x90c6441, 0xc4ad2db2},
+ /* 112 */ {4, 0.1468999356504447, 0x9610000, 0xb4b985cf},
+ /* 113 */ {4, 0.1466237184553111, 0x9b7e7c1, 0xa5782bef},
+ /* 114 */ {4, 0.1463509580758620, 0xa112610, 0x96dfdd2a},
+ /* 115 */ {4, 0.1460815796324244, 0xa6cc591, 0x88e7e509},
+ /* 116 */ {4, 0.1458155105286054, 0xacad100, 0x7b8813d3},
+ /* 117 */ {4, 0.1455526803620167, 0xb2b5331, 0x6eb8b595},
+ /* 118 */ {4, 0.1452930208392428, 0xb8e5710, 0x627289db},
+ /* 119 */ {4, 0.1450364656948130, 0xbf3e7a1, 0x56aebc07},
+ /* 120 */ {4, 0.1447829506139581, 0xc5c1000, 0x4b66dc33},
+ /* 121 */ {4, 0.1445324131589439, 0xcc6db61, 0x4094d8a3},
+ /* 122 */ {4, 0.1442847926987864, 0xd345510, 0x3632f7a5},
+ /* 123 */ {4, 0.1440400303421672, 0xda48871, 0x2c3bd1f0},
+ /* 124 */ {4, 0.1437980688733775, 0xe178100, 0x22aa4d5f},
+ /* 125 */ {4, 0.1435588526911310, 0xe8d4a51, 0x19799812},
+ /* 126 */ {4, 0.1433223277500932, 0xf05f010, 0x10a523e5},
+ /* 127 */ {4, 0.1430884415049874, 0xf817e01, 0x828a237},
+ /* 128 */ {4, 0.1428571428571428, 0x7, 0x0},
+ /* 129 */ {4, 0.1426283821033600, 0x10818201, 0xf04ec452},
+ /* 130 */ {4, 0.1424021108869747, 0x11061010, 0xe136444a},
+ /* 131 */ {4, 0.1421782821510107, 0x118db651, 0xd2af9589},
+ /* 132 */ {4, 0.1419568500933153, 0x12188100, 0xc4b42a83},
+ /* 133 */ {4, 0.1417377701235801, 0x12a67c71, 0xb73dccf5},
+ /* 134 */ {4, 0.1415209988221527, 0x1337b510, 0xaa4698c5},
+ /* 135 */ {4, 0.1413064939005528, 0x13cc3761, 0x9dc8f729},
+ /* 136 */ {4, 0.1410942141636095, 0x14641000, 0x91bf9a30},
+ /* 137 */ {4, 0.1408841194731412, 0x14ff4ba1, 0x86257887},
+ /* 138 */ {4, 0.1406761707131039, 0x159df710, 0x7af5c98c},
+ /* 139 */ {4, 0.1404703297561400, 0x16401f31, 0x702c01a0},
+ /* 140 */ {4, 0.1402665594314587, 0x16e5d100, 0x65c3ceb1},
+ /* 141 */ {4, 0.1400648234939879, 0x178f1991, 0x5bb91502},
+ /* 142 */ {4, 0.1398650865947379, 0x183c0610, 0x5207ec23},
+ /* 143 */ {4, 0.1396673142523192, 0x18eca3c1, 0x48ac9c19},
+ /* 144 */ {4, 0.1394714728255649, 0x19a10000, 0x3fa39ab5},
+ /* 145 */ {4, 0.1392775294872041, 0x1a592841, 0x36e98912},
+ /* 146 */ {4, 0.1390854521985406, 0x1b152a10, 0x2e7b3140},
+ /* 147 */ {4, 0.1388952096850913, 0x1bd51311, 0x2655840b},
+ /* 148 */ {4, 0.1387067714131417, 0x1c98f100, 0x1e7596ea},
+ /* 149 */ {4, 0.1385201075671774, 0x1d60d1b1, 0x16d8a20d},
+ /* 150 */ {4, 0.1383351890281539, 0x1e2cc310, 0xf7bfe87},
+ /* 151 */ {4, 0.1381519873525671, 0x1efcd321, 0x85d2492},
+ /* 152 */ {4, 0.1379704747522905, 0x1fd11000, 0x179a9f4},
+ /* 153 */ {4, 0.1377906240751463, 0x20a987e1, 0xf59e80eb},
+ /* 154 */ {4, 0.1376124087861776, 0x21864910, 0xe8b768db},
+ /* 155 */ {4, 0.1374358029495937, 0x226761f1, 0xdc39d6d5},
+ /* 156 */ {4, 0.1372607812113589, 0x234ce100, 0xd021c5d1},
+ /* 157 */ {4, 0.1370873187823978, 0x2436d4d1, 0xc46b5e37},
+ /* 158 */ {4, 0.1369153914223921, 0x25254c10, 0xb912f39c},
+ /* 159 */ {4, 0.1367449754241439, 0x26185581, 0xae150294},
+ /* 160 */ {4, 0.1365760475984821, 0x27100000, 0xa36e2eb1},
+ /* 161 */ {4, 0.1364085852596902, 0x280c5a81, 0x991b4094},
+ /* 162 */ {4, 0.1362425662114337, 0x290d7410, 0x8f19241e},
+ /* 163 */ {4, 0.1360779687331669, 0x2a135bd1, 0x8564e6b7},
+ /* 164 */ {4, 0.1359147715670014, 0x2b1e2100, 0x7bfbb5b4},
+ /* 165 */ {4, 0.1357529539050150, 0x2c2dd2f1, 0x72dadcc8},
+ /* 166 */ {4, 0.1355924953769863, 0x2d428110, 0x69ffc498},
+ /* 167 */ {4, 0.1354333760385373, 0x2e5c3ae1, 0x6167f154},
+ /* 168 */ {4, 0.1352755763596663, 0x2f7b1000, 0x5911016e},
+ /* 169 */ {4, 0.1351190772136599, 0x309f1021, 0x50f8ac5f},
+ /* 170 */ {4, 0.1349638598663645, 0x31c84b10, 0x491cc17c},
+ /* 171 */ {4, 0.1348099059658079, 0x32f6d0b1, 0x417b26d8},
+ /* 172 */ {4, 0.1346571975321549, 0x342ab100, 0x3a11d83b},
+ /* 173 */ {4, 0.1345057169479844, 0x3563fc11, 0x32dee622},
+ /* 174 */ {4, 0.1343554469488779, 0x36a2c210, 0x2be074cd},
+ /* 175 */ {4, 0.1342063706143054, 0x37e71341, 0x2514bb58},
+ /* 176 */ {4, 0.1340584713587980, 0x39310000, 0x1e7a02e7},
+ /* 177 */ {4, 0.1339117329233981, 0x3a8098c1, 0x180ea5d0},
+ /* 178 */ {4, 0.1337661393673756, 0x3bd5ee10, 0x11d10edd},
+ /* 179 */ {4, 0.1336216750601996, 0x3d311091, 0xbbfb88e},
+ /* 180 */ {4, 0.1334783246737591, 0x3e921100, 0x5d92c68},
+ /* 181 */ {4, 0.1333360731748201, 0x3ff90031, 0x1c024c},
+ /* 182 */ {4, 0.1331949058177136, 0x4165ef10, 0xf50dbfb2},
+ /* 183 */ {4, 0.1330548081372441, 0x42d8eea1, 0xea30efa3},
+ /* 184 */ {4, 0.1329157659418126, 0x44521000, 0xdf9f1316},
+ /* 185 */ {4, 0.1327777653067443, 0x45d16461, 0xd555c0c9},
+ /* 186 */ {4, 0.1326407925678156, 0x4756fd10, 0xcb52a684},
+ /* 187 */ {4, 0.1325048343149731, 0x48e2eb71, 0xc193881f},
+ /* 188 */ {4, 0.1323698773862368, 0x4a754100, 0xb8163e97},
+ /* 189 */ {4, 0.1322359088617821, 0x4c0e0f51, 0xaed8b724},
+ /* 190 */ {4, 0.1321029160581950, 0x4dad6810, 0xa5d8f269},
+ /* 191 */ {4, 0.1319708865228925, 0x4f535d01, 0x9d15039d},
+ /* 192 */ {4, 0.1318398080287045, 0x51000000, 0x948b0fcd},
+ /* 193 */ {4, 0.1317096685686114, 0x52b36301, 0x8c394d1d},
+ /* 194 */ {4, 0.1315804563506306, 0x546d9810, 0x841e0215},
+ /* 195 */ {4, 0.1314521597928493, 0x562eb151, 0x7c3784f8},
+ /* 196 */ {4, 0.1313247675185968, 0x57f6c100, 0x74843b1e},
+ /* 197 */ {4, 0.1311982683517524, 0x59c5d971, 0x6d02985d},
+ /* 198 */ {4, 0.1310726513121843, 0x5b9c0d10, 0x65b11e6e},
+ /* 199 */ {4, 0.1309479056113158, 0x5d796e61, 0x5e8e5c64},
+ /* 200 */ {4, 0.1308240206478128, 0x5f5e1000, 0x5798ee23},
+ /* 201 */ {4, 0.1307009860033912, 0x614a04a1, 0x50cf7bde},
+ /* 202 */ {4, 0.1305787914387386, 0x633d5f10, 0x4a30b99b},
+ /* 203 */ {4, 0.1304574268895465, 0x65383231, 0x43bb66bd},
+ /* 204 */ {4, 0.1303368824626505, 0x673a9100, 0x3d6e4d94},
+ /* 205 */ {4, 0.1302171484322746, 0x69448e91, 0x374842ee},
+ /* 206 */ {4, 0.1300982152363760, 0x6b563e10, 0x314825b0},
+ /* 207 */ {4, 0.1299800734730872, 0x6d6fb2c1, 0x2b6cde75},
+ /* 208 */ {4, 0.1298627138972530, 0x6f910000, 0x25b55f2e},
+ /* 209 */ {4, 0.1297461274170591, 0x71ba3941, 0x2020a2c5},
+ /* 210 */ {4, 0.1296303050907487, 0x73eb7210, 0x1aadaccb},
+ /* 211 */ {4, 0.1295152381234257, 0x7624be11, 0x155b891f},
+ /* 212 */ {4, 0.1294009178639407, 0x78663100, 0x10294ba2},
+ /* 213 */ {4, 0.1292873358018581, 0x7aafdeb1, 0xb160fe9},
+ /* 214 */ {4, 0.1291744835645007, 0x7d01db10, 0x620f8f6},
+ /* 215 */ {4, 0.1290623529140715, 0x7f5c3a21, 0x14930ef},
+ /* 216 */ {4, 0.1289509357448472, 0x81bf1000, 0xf91bd1b6},
+ /* 217 */ {4, 0.1288402240804449, 0x842a70e1, 0xefdcb0c7},
+ /* 218 */ {4, 0.1287302100711567, 0x869e7110, 0xe6d37b2a},
+ /* 219 */ {4, 0.1286208859913518, 0x891b24f1, 0xddfeb94a},
+ /* 220 */ {4, 0.1285122442369443, 0x8ba0a100, 0xd55cff6e},
+ /* 221 */ {4, 0.1284042773229231, 0x8e2ef9d1, 0xcceced50},
+ /* 222 */ {4, 0.1282969778809442, 0x90c64410, 0xc4ad2db2},
+ /* 223 */ {4, 0.1281903386569819, 0x93669481, 0xbc9c75f9},
+ /* 224 */ {4, 0.1280843525090381, 0x96100000, 0xb4b985cf},
+ /* 225 */ {4, 0.1279790124049077, 0x98c29b81, 0xad0326c2},
+ /* 226 */ {4, 0.1278743114199984, 0x9b7e7c10, 0xa5782bef},
+ /* 227 */ {4, 0.1277702427352035, 0x9e43b6d1, 0x9e1771a9},
+ /* 228 */ {4, 0.1276667996348261, 0xa1126100, 0x96dfdd2a},
+ /* 229 */ {4, 0.1275639755045533, 0xa3ea8ff1, 0x8fd05c41},
+ /* 230 */ {4, 0.1274617638294791, 0xa6cc5910, 0x88e7e509},
+ /* 231 */ {4, 0.1273601581921741, 0xa9b7d1e1, 0x8225759d},
+ /* 232 */ {4, 0.1272591522708010, 0xacad1000, 0x7b8813d3},
+ /* 233 */ {4, 0.1271587398372755, 0xafac2921, 0x750eccf9},
+ /* 234 */ {4, 0.1270589147554692, 0xb2b53310, 0x6eb8b595},
+ /* 235 */ {4, 0.1269596709794558, 0xb5c843b1, 0x6884e923},
+ /* 236 */ {4, 0.1268610025517973, 0xb8e57100, 0x627289db},
+ /* 237 */ {4, 0.1267629036018709, 0xbc0cd111, 0x5c80c07b},
+ /* 238 */ {4, 0.1266653683442337, 0xbf3e7a10, 0x56aebc07},
+ /* 239 */ {4, 0.1265683910770258, 0xc27a8241, 0x50fbb19b},
+ /* 240 */ {4, 0.1264719661804097, 0xc5c10000, 0x4b66dc33},
+ /* 241 */ {4, 0.1263760881150453, 0xc91209c1, 0x45ef7c7c},
+ /* 242 */ {4, 0.1262807514205999, 0xcc6db610, 0x4094d8a3},
+ /* 243 */ {4, 0.1261859507142915, 0xcfd41b91, 0x3b563c24},
+ /* 244 */ {4, 0.1260916806894653, 0xd3455100, 0x3632f7a5},
+ /* 245 */ {4, 0.1259979361142023, 0xd6c16d31, 0x312a60c3},
+ /* 246 */ {4, 0.1259047118299582, 0xda488710, 0x2c3bd1f0},
+ /* 247 */ {4, 0.1258120027502338, 0xdddab5a1, 0x2766aa45},
+ /* 248 */ {4, 0.1257198038592741, 0xe1781000, 0x22aa4d5f},
+ /* 249 */ {4, 0.1256281102107963, 0xe520ad61, 0x1e06233c},
+ /* 250 */ {4, 0.1255369169267456, 0xe8d4a510, 0x19799812},
+ /* 251 */ {4, 0.1254462191960791, 0xec940e71, 0x15041c33},
+ /* 252 */ {4, 0.1253560122735751, 0xf05f0100, 0x10a523e5},
+ /* 253 */ {4, 0.1252662914786691, 0xf4359451, 0xc5c2749},
+ /* 254 */ {4, 0.1251770521943144, 0xf817e010, 0x828a237},
+ /* 255 */ {4, 0.1250882898658681, 0xfc05fc01, 0x40a1423},
+};
+#endif
+#if BITS_PER_MP_LIMB == 64
+const struct bases __mp_bases[256] =
+{
+ /* 0 */ {0, 0.0, 0, 0},
+ /* 1 */ {0, 1e38, 0, 0},
+ /* 2 */ {64, 1.0000000000000000, CNST_LIMB(0x1), CNST_LIMB(0x0)},
+ /* 3 */ {40, 0.6309297535714574, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 4 */ {32, 0.5000000000000000, CNST_LIMB(0x2), CNST_LIMB(0x0)},
+ /* 5 */ {27, 0.4306765580733931, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
+ /* 6 */ {24, 0.3868528072345416, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
+ /* 7 */ {22, 0.3562071871080222, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
+ /* 8 */ {21, 0.3333333333333334, CNST_LIMB(0x3), CNST_LIMB(0x0)},
+ /* 9 */ {20, 0.3154648767857287, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 10 */ {19, 0.3010299956639811, CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a)},
+ /* 11 */ {18, 0.2890648263178878, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
+ /* 12 */ {17, 0.2789429456511298, CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f)},
+ /* 13 */ {17, 0.2702381544273197, CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3)},
+ /* 14 */ {16, 0.2626495350371936, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
+ /* 15 */ {16, 0.2559580248098155, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
+ /* 16 */ {16, 0.2500000000000000, CNST_LIMB(0x4), CNST_LIMB(0x0)},
+ /* 17 */ {15, 0.2446505421182260, CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323)},
+ /* 18 */ {15, 0.2398124665681315, CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c)},
+ /* 19 */ {15, 0.2354089133666382, CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec)},
+ /* 20 */ {14, 0.2313782131597592, CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e)},
+ /* 21 */ {14, 0.2276702486969530, CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239)},
+ /* 22 */ {14, 0.2242438242175754, CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08)},
+ /* 23 */ {14, 0.2210647294575037, CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38)},
+ /* 24 */ {13, 0.2181042919855316, CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1)},
+ /* 25 */ {13, 0.2153382790366965, CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5)},
+ /* 26 */ {13, 0.2127460535533632, CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9)},
+ /* 27 */ {13, 0.2103099178571525, CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae)},
+ /* 28 */ {13, 0.2080145976765095, CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636)},
+ /* 29 */ {13, 0.2058468324604344, CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d)},
+ /* 30 */ {13, 0.2037950470905062, CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6)},
+ /* 31 */ {12, 0.2018490865820999, CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5)},
+ /* 32 */ {12, 0.2000000000000000, CNST_LIMB(0x5), CNST_LIMB(0x0)},
+ /* 33 */ {12, 0.1982398631705605, CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f)},
+ /* 34 */ {12, 0.1965616322328226, CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec)},
+ /* 35 */ {12, 0.1949590218937863, CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509)},
+ /* 36 */ {12, 0.1934264036172708, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
+ /* 37 */ {12, 0.1919587200065601, CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f)},
+ /* 38 */ {12, 0.1905514124267734, CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544)},
+ /* 39 */ {12, 0.1892003595168700, CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c)},
+ /* 40 */ {12, 0.1879018247091076, CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197)},
+ /* 41 */ {11, 0.1866524112389434, CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae)},
+ /* 42 */ {11, 0.1854490234153689, CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7)},
+ /* 43 */ {11, 0.1842888331487062, CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94)},
+ /* 44 */ {11, 0.1831692509136336, CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a)},
+ /* 45 */ {11, 0.1820879004699383, CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46)},
+ /* 46 */ {11, 0.1810425967800402, CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083)},
+ /* 47 */ {11, 0.1800313266566926, CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988)},
+ /* 48 */ {11, 0.1790522317510413, CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167)},
+ /* 49 */ {11, 0.1781035935540111, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
+ /* 50 */ {11, 0.1771838201355579, CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962)},
+ /* 51 */ {11, 0.1762914343888821, CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e)},
+ /* 52 */ {11, 0.1754250635819545, CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d)},
+ /* 53 */ {11, 0.1745834300480449, CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25)},
+ /* 54 */ {11, 0.1737653428714400, CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a)},
+ /* 55 */ {11, 0.1729696904450771, CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d)},
+ /* 56 */ {11, 0.1721954337940981, CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1)},
+ /* 57 */ {10, 0.1714416005739134, CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1)},
+ /* 58 */ {10, 0.1707072796637201, CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c)},
+ /* 59 */ {10, 0.1699916162869140, CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492)},
+ /* 60 */ {10, 0.1692938075987814, CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5)},
+ /* 61 */ {10, 0.1686130986895011, CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416)},
+ /* 62 */ {10, 0.1679487789570419, CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5)},
+ /* 63 */ {10, 0.1673001788101741, CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641)},
+ /* 64 */ {10, 0.1666666666666667, CNST_LIMB(0x6), CNST_LIMB(0x0)},
+ /* 65 */ {10, 0.1660476462159378, CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1)},
+ /* 66 */ {10, 0.1654425539190583, CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33)},
+ /* 67 */ {10, 0.1648508567221604, CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0)},
+ /* 68 */ {10, 0.1642720499620502, CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409)},
+ /* 69 */ {10, 0.1637056554452156, CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f)},
+ /* 70 */ {10, 0.1631512196835108, CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62)},
+ /* 71 */ {10, 0.1626083122716341, CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2)},
+ /* 72 */ {10, 0.1620765243931223, CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2)},
+ /* 73 */ {10, 0.1615554674429964, CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b)},
+ /* 74 */ {10, 0.1610447717564444, CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5)},
+ /* 75 */ {10, 0.1605440854340214, CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e)},
+ /* 76 */ {10, 0.1600530732548213, CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd)},
+ /* 77 */ {10, 0.1595714156699382, CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec)},
+ /* 78 */ {10, 0.1590988078692941, CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505)},
+ /* 79 */ {10, 0.1586349589155960, CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac)},
+ /* 80 */ {10, 0.1581795909397823, CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d)},
+ /* 81 */ {10, 0.1577324383928644, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 82 */ {10, 0.1572932473495469, CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f)},
+ /* 83 */ {10, 0.1568617748594410, CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47)},
+ /* 84 */ {10, 0.1564377883420716, CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a)},
+ /* 85 */ {9, 0.1560210650222250, CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4)},
+ /* 86 */ {9, 0.1556113914024940, CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e)},
+ /* 87 */ {9, 0.1552085627701551, CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89)},
+ /* 88 */ {9, 0.1548123827357682, CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa)},
+ /* 89 */ {9, 0.1544226628011101, CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383)},
+ /* 90 */ {9, 0.1540392219542636, CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10)},
+ /* 91 */ {9, 0.1536618862898642, CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43)},
+ /* 92 */ {9, 0.1532904886526781, CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447)},
+ /* 93 */ {9, 0.1529248683028321, CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0)},
+ /* 94 */ {9, 0.1525648706011593, CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4)},
+ /* 95 */ {9, 0.1522103467132434, CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f)},
+ /* 96 */ {9, 0.1518611533308632, CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794)},
+ /* 97 */ {9, 0.1515171524096389, CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc)},
+ /* 98 */ {9, 0.1511782109217764, CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3)},
+ /* 99 */ {9, 0.1508442006228941, CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518)},
+ /* 100 */ {9, 0.1505149978319906, CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e)},
+ /* 101 */ {9, 0.1501904832236879, CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f)},
+ /* 102 */ {9, 0.1498705416319474, CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1)},
+ /* 103 */ {9, 0.1495550618645152, CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328)},
+ /* 104 */ {9, 0.1492439365274121, CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c)},
+ /* 105 */ {9, 0.1489370618588283, CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179)},
+ /* 106 */ {9, 0.1486343375718350, CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf)},
+ /* 107 */ {9, 0.1483356667053617, CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32)},
+ /* 108 */ {9, 0.1480409554829326, CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188)},
+ /* 109 */ {9, 0.1477501131786861, CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c)},
+ /* 110 */ {9, 0.1474630519902391, CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f)},
+ /* 111 */ {9, 0.1471796869179852, CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0)},
+ /* 112 */ {9, 0.1468999356504447, CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8)},
+ /* 113 */ {9, 0.1466237184553111, CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82)},
+ /* 114 */ {9, 0.1463509580758620, CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a)},
+ /* 115 */ {9, 0.1460815796324244, CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5)},
+ /* 116 */ {9, 0.1458155105286054, CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610)},
+ /* 117 */ {9, 0.1455526803620167, CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c)},
+ /* 118 */ {9, 0.1452930208392428, CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977)},
+ /* 119 */ {9, 0.1450364656948130, CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e)},
+ /* 120 */ {9, 0.1447829506139581, CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5)},
+ /* 121 */ {9, 0.1445324131589439, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
+ /* 122 */ {9, 0.1442847926987864, CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5)},
+ /* 123 */ {9, 0.1440400303421672, CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b)},
+ /* 124 */ {9, 0.1437980688733775, CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10)},
+ /* 125 */ {9, 0.1435588526911310, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
+ /* 126 */ {9, 0.1433223277500932, CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8)},
+ /* 127 */ {9, 0.1430884415049874, CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1)},
+ /* 128 */ {9, 0.1428571428571428, CNST_LIMB(0x7), CNST_LIMB(0x0)},
+ /* 129 */ {9, 0.1426283821033600, CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d)},
+ /* 130 */ {9, 0.1424021108869747, CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0)},
+ /* 131 */ {9, 0.1421782821510107, CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085)},
+ /* 132 */ {9, 0.1419568500933153, CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d)},
+ /* 133 */ {9, 0.1417377701235801, CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25)},
+ /* 134 */ {9, 0.1415209988221527, CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d)},
+ /* 135 */ {9, 0.1413064939005528, CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914)},
+ /* 136 */ {9, 0.1410942141636095, CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849)},
+ /* 137 */ {9, 0.1408841194731412, CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9)},
+ /* 138 */ {9, 0.1406761707131039, CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0)},
+ /* 139 */ {8, 0.1404703297561400, CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf)},
+ /* 140 */ {8, 0.1402665594314587, CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05)},
+ /* 141 */ {8, 0.1400648234939879, CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19)},
+ /* 142 */ {8, 0.1398650865947379, CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe)},
+ /* 143 */ {8, 0.1396673142523192, CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7)},
+ /* 144 */ {8, 0.1394714728255649, CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757)},
+ /* 145 */ {8, 0.1392775294872041, CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae)},
+ /* 146 */ {8, 0.1390854521985406, CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40)},
+ /* 147 */ {8, 0.1388952096850913, CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f)},
+ /* 148 */ {8, 0.1387067714131417, CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d)},
+ /* 149 */ {8, 0.1385201075671774, CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1)},
+ /* 150 */ {8, 0.1383351890281539, CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe)},
+ /* 151 */ {8, 0.1381519873525671, CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e)},
+ /* 152 */ {8, 0.1379704747522905, CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc)},
+ /* 153 */ {8, 0.1377906240751463, CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910)},
+ /* 154 */ {8, 0.1376124087861776, CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614)},
+ /* 155 */ {8, 0.1374358029495937, CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2)},
+ /* 156 */ {8, 0.1372607812113589, CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d)},
+ /* 157 */ {8, 0.1370873187823978, CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782)},
+ /* 158 */ {8, 0.1369153914223921, CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b)},
+ /* 159 */ {8, 0.1367449754241439, CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd)},
+ /* 160 */ {8, 0.1365760475984821, CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9)},
+ /* 161 */ {8, 0.1364085852596902, CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753)},
+ /* 162 */ {8, 0.1362425662114337, CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7)},
+ /* 163 */ {8, 0.1360779687331669, CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb)},
+ /* 164 */ {8, 0.1359147715670014, CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac)},
+ /* 165 */ {8, 0.1357529539050150, CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e)},
+ /* 166 */ {8, 0.1355924953769863, CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004)},
+ /* 167 */ {8, 0.1354333760385373, CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8)},
+ /* 168 */ {8, 0.1352755763596663, CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5)},
+ /* 169 */ {8, 0.1351190772136599, CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe)},
+ /* 170 */ {8, 0.1349638598663645, CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532)},
+ /* 171 */ {8, 0.1348099059658079, CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02)},
+ /* 172 */ {8, 0.1346571975321549, CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317)},
+ /* 173 */ {8, 0.1345057169479844, CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c)},
+ /* 174 */ {8, 0.1343554469488779, CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e)},
+ /* 175 */ {8, 0.1342063706143054, CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286)},
+ /* 176 */ {8, 0.1340584713587980, CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb)},
+ /* 177 */ {8, 0.1339117329233981, CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b)},
+ /* 178 */ {8, 0.1337661393673756, CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1)},
+ /* 179 */ {8, 0.1336216750601996, CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef)},
+ /* 180 */ {8, 0.1334783246737591, CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3)},
+ /* 181 */ {8, 0.1333360731748201, CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb)},
+ /* 182 */ {8, 0.1331949058177136, CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8)},
+ /* 183 */ {8, 0.1330548081372441, CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005)},
+ /* 184 */ {8, 0.1329157659418126, CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926)},
+ /* 185 */ {8, 0.1327777653067443, CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f)},
+ /* 186 */ {8, 0.1326407925678156, CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6)},
+ /* 187 */ {8, 0.1325048343149731, CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497)},
+ /* 188 */ {8, 0.1323698773862368, CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67)},
+ /* 189 */ {8, 0.1322359088617821, CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b)},
+ /* 190 */ {8, 0.1321029160581950, CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56)},
+ /* 191 */ {8, 0.1319708865228925, CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0)},
+ /* 192 */ {8, 0.1318398080287045, CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf)},
+ /* 193 */ {8, 0.1317096685686114, CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a)},
+ /* 194 */ {8, 0.1315804563506306, CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962)},
+ /* 195 */ {8, 0.1314521597928493, CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e)},
+ /* 196 */ {8, 0.1313247675185968, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
+ /* 197 */ {8, 0.1311982683517524, CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca)},
+ /* 198 */ {8, 0.1310726513121843, CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2)},
+ /* 199 */ {8, 0.1309479056113158, CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9)},
+ /* 200 */ {8, 0.1308240206478128, CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6)},
+ /* 201 */ {8, 0.1307009860033912, CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22)},
+ /* 202 */ {8, 0.1305787914387386, CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20)},
+ /* 203 */ {8, 0.1304574268895465, CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf)},
+ /* 204 */ {8, 0.1303368824626505, CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9)},
+ /* 205 */ {8, 0.1302171484322746, CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff)},
+ /* 206 */ {8, 0.1300982152363760, CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a)},
+ /* 207 */ {8, 0.1299800734730872, CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd)},
+ /* 208 */ {8, 0.1298627138972530, CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985)},
+ /* 209 */ {8, 0.1297461274170591, CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7)},
+ /* 210 */ {8, 0.1296303050907487, CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175)},
+ /* 211 */ {8, 0.1295152381234257, CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e)},
+ /* 212 */ {8, 0.1294009178639407, CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef)},
+ /* 213 */ {8, 0.1292873358018581, CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9)},
+ /* 214 */ {8, 0.1291744835645007, CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc)},
+ /* 215 */ {8, 0.1290623529140715, CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b)},
+ /* 216 */ {8, 0.1289509357448472, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
+ /* 217 */ {8, 0.1288402240804449, CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2)},
+ /* 218 */ {8, 0.1287302100711567, CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049)},
+ /* 219 */ {8, 0.1286208859913518, CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada)},
+ /* 220 */ {8, 0.1285122442369443, CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933)},
+ /* 221 */ {8, 0.1284042773229231, CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d)},
+ /* 222 */ {8, 0.1282969778809442, CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b)},
+ /* 223 */ {8, 0.1281903386569819, CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4)},
+ /* 224 */ {8, 0.1280843525090381, CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053)},
+ /* 225 */ {8, 0.1279790124049077, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
+ /* 226 */ {8, 0.1278743114199984, CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9)},
+ /* 227 */ {8, 0.1277702427352035, CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e)},
+ /* 228 */ {8, 0.1276667996348261, CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b)},
+ /* 229 */ {8, 0.1275639755045533, CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1)},
+ /* 230 */ {8, 0.1274617638294791, CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762)},
+ /* 231 */ {8, 0.1273601581921741, CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555)},
+ /* 232 */ {8, 0.1272591522708010, CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe)},
+ /* 233 */ {8, 0.1271587398372755, CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f)},
+ /* 234 */ {8, 0.1270589147554692, CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3)},
+ /* 235 */ {8, 0.1269596709794558, CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c)},
+ /* 236 */ {8, 0.1268610025517973, CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773)},
+ /* 237 */ {8, 0.1267629036018709, CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5)},
+ /* 238 */ {8, 0.1266653683442337, CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f)},
+ /* 239 */ {8, 0.1265683910770258, CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c)},
+ /* 240 */ {8, 0.1264719661804097, CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529)},
+ /* 241 */ {8, 0.1263760881150453, CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1)},
+ /* 242 */ {8, 0.1262807514205999, CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2)},
+ /* 243 */ {8, 0.1261859507142915, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 244 */ {8, 0.1260916806894653, CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6)},
+ /* 245 */ {8, 0.1259979361142023, CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2)},
+ /* 246 */ {8, 0.1259047118299582, CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47)},
+ /* 247 */ {8, 0.1258120027502338, CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016)},
+ /* 248 */ {8, 0.1257198038592741, CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18)},
+ /* 249 */ {8, 0.1256281102107963, CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d)},
+ /* 250 */ {8, 0.1255369169267456, CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5)},
+ /* 251 */ {8, 0.1254462191960791, CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3)},
+ /* 252 */ {8, 0.1253560122735751, CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d)},
+ /* 253 */ {8, 0.1252662914786691, CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac)},
+ /* 254 */ {8, 0.1251770521943144, CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93)},
+ /* 255 */ {8, 0.1250882898658681, CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f)},
+};
+#endif
diff --git a/rts/gmp/mpn/ns32k/add_n.s b/rts/gmp/mpn/ns32k/add_n.s
new file mode 100644
index 0000000000..bd063d07d9
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/add_n.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ save [r3,r4,r5]
+ negd 28(sp),r3
+ movd r3,r0
+ lshd 2,r0
+ movd 24(sp),r4
+ subd r0,r4 # r4 -> to end of S2
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r2
+ subd r0,r2 # r2 -> to end of RES
+ subd r0,r0 # cy = 0
+
+Loop: movd r5[r3:d],r0
+ addcd r4[r3:d],r0
+ movd r0,r2[r3:d]
+ acbd 1,r3,Loop
+
+ scsd r0 # r0 = cy.
+ restore [r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/addmul_1.s b/rts/gmp/mpn/ns32k/addmul_1.s
new file mode 100644
index 0000000000..df0dcdd4af
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/addmul_1.s
@@ -0,0 +1,48 @@
+# ns32000 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+ save [r3,r4,r5,r6,r7]
+ negd 24(sp),r4
+ movd r4,r0
+ lshd 2,r0
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r6
+ subd r0,r6 # r6 -> to end of RES
+ subd r0,r0 # r0 = 0, cy = 0
+ movd 28(sp),r7 # r7 = s2_limb
+
+Loop: movd r5[r4:d],r2
+ meid r7,r2 # r2 = low_prod, r3 = high_prod
+ addcd r0,r2 # r2 = low_prod + cy_limb
+ movd r3,r0 # r0 = new cy_limb
+ addcd 0,r0
+ addd r2,r6[r4:d]
+ acbd 1,r4,Loop
+
+ addcd 0,r0
+ restore [r7,r6,r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/mul_1.s b/rts/gmp/mpn/ns32k/mul_1.s
new file mode 100644
index 0000000000..0a77efba29
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/mul_1.s
@@ -0,0 +1,47 @@
+# ns32000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ save [r3,r4,r5,r6,r7]
+ negd 24(sp),r4
+ movd r4,r0
+ lshd 2,r0
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r6
+ subd r0,r6 # r6 -> to end of RES
+ subd r0,r0 # r0 = 0, cy = 0
+ movd 28(sp),r7 # r7 = s2_limb
+
+Loop: movd r5[r4:d],r2
+ meid r7,r2 # r2 = low_prod, r3 = high_prod
+ addcd r0,r2 # r2 = low_prod + cy_limb
+ movd r3,r0 # r0 = new cy_limb
+ movd r2,r6[r4:d]
+ acbd 1,r4,Loop
+
+ addcd 0,r0
+ restore [r7,r6,r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/sub_n.s b/rts/gmp/mpn/ns32k/sub_n.s
new file mode 100644
index 0000000000..cd89f4fd3f
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/sub_n.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ save [r3,r4,r5]
+ negd 28(sp),r3
+ movd r3,r0
+ lshd 2,r0
+ movd 24(sp),r4
+ subd r0,r4 # r4 -> to end of S2
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r2
+ subd r0,r2 # r2 -> to end of RES
+ subd r0,r0 # cy = 0
+
+Loop: movd r5[r3:d],r0
+ subcd r4[r3:d],r0
+ movd r0,r2[r3:d]
+ acbd 1,r3,Loop
+
+ scsd r0 # r0 = cy.
+ restore [r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/submul_1.s b/rts/gmp/mpn/ns32k/submul_1.s
new file mode 100644
index 0000000000..f811aedcf1
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/submul_1.s
@@ -0,0 +1,48 @@
+# ns32000 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+ save [r3,r4,r5,r6,r7]
+ negd 24(sp),r4
+ movd r4,r0
+ lshd 2,r0
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r6
+ subd r0,r6 # r6 -> to end of RES
+ subd r0,r0 # r0 = 0, cy = 0
+ movd 28(sp),r7 # r7 = s2_limb
+
+Loop: movd r5[r4:d],r2
+ meid r7,r2 # r2 = low_prod, r3 = high_prod
+ addcd r0,r2 # r2 = low_prod + cy_limb
+ movd r3,r0 # r0 = new cy_limb
+ addcd 0,r0
+ subd r2,r6[r4:d]
+ acbd 1,r4,Loop
+
+ addcd 0,r0
+ restore [r7,r6,r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/pa64/README b/rts/gmp/mpn/pa64/README
new file mode 100644
index 0000000000..8d2976dabc
--- /dev/null
+++ b/rts/gmp/mpn/pa64/README
@@ -0,0 +1,38 @@
+This directory contains mpn functions for 64-bit PA-RISC 2.0.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The PA8000 has a multi-issue pipeline with large buffers for instructions
+awaiting pending results. Therefore, no latency scheduling is necessary
+(and might actually be harmful).
+
+Two 64-bit loads can be completed per cycle. One 64-bit store can be
+completed per cycle. A store cannot complete in the same cycle as a load.
+
+STATUS
+
+* mpn_lshift, mpn_rshift, mpn_add_n, mpn_sub_n are all well-tuned and run at
+ the peak cache bandwidth; 1.5 cycles/limb for shifting and 2.0 cycles/limb
+ for add/subtract.
+
+* The multiplication functions run at 11 cycles/limb. The cache bandwidth
+ allows 7.5 cycles/limb. Perhaps it would be possible, using unrolling or
+ better scheduling, to get closer to the cache bandwidth limit.
+
+* xaddmul_1.S contains a quicker method for forming the 128 bit product. It
+ uses some fewer operations, and keep the carry flag live across the loop
+ boundary. But it seems hard to make it run more than 1/4 cycle faster
+ than the old code. Perhaps we really ought to unroll this loop be 2x?
+ 2x should suffice since register latency schedling is never needed,
+ but the unrolling would hide the store-load latency. Here is a sketch:
+
+ 1. A multiply and store 64-bit products
+ 2. B sum 64-bit products 128-bit product
+ 3. B load 64-bit products to integer registers
+ 4. B multiply and store 64-bit products
+ 5. A sum 64-bit products 128-bit product
+ 6. A load 64-bit products to integer registers
+ 7. goto 1
+
+ In practice, adjacent groups (1 and 2, 2 and 3, etc) will be interleaved
+ for better instruction mix.
diff --git a/rts/gmp/mpn/pa64/add_n.s b/rts/gmp/mpn/pa64/add_n.s
new file mode 100644
index 0000000000..22ff19c184
--- /dev/null
+++ b/rts/gmp/mpn/pa64/add_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+; store sum in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_add_n,entry
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ bve (%r2)
+ .exit
+ ldi 0,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/addmul_1.S b/rts/gmp/mpn/pa64/addmul_1.S
new file mode 100644
index 0000000000..b1885b432c
--- /dev/null
+++ b/rts/gmp/mpn/pa64/addmul_1.S
@@ -0,0 +1,167 @@
+; HP-PA 2.0 64-bit __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb -56(%r30)
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0n
+ .code
+ .export __gmpn_addmul_1,entry
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ fldd -56(%r30),%fr5 ; s2limb passed on stack
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,rlimb,rlimb
+ add,dc t2,hi,cylimb
+ add t4,rlimb,t3
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ extrd,u cylimb,31,32,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64/gmp-mparam.h b/rts/gmp/mpn/pa64/gmp-mparam.h
new file mode 100644
index 0000000000..847735b987
--- /dev/null
+++ b/rts/gmp/mpn/pa64/gmp-mparam.h
@@ -0,0 +1,65 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values were measured in a PA8000 using the system compiler version
+ A.10.32.30. Presumably the PA8200 and PA8500 have the same timing
+ characteristic, but GCC might give somewhat different results. */
+/* Generated by tuneup.c, 2000-07-25. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 16
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 105
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 40
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 116
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 72
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 94
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 50
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 1
+#endif
diff --git a/rts/gmp/mpn/pa64/lshift.s b/rts/gmp/mpn/pa64/lshift.s
new file mode 100644
index 0000000000..994bc1c4d6
--- /dev/null
+++ b/rts/gmp/mpn/pa64/lshift.s
@@ -0,0 +1,103 @@
+; HP-PA 2.0 __gmpn_lshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_lshift,entry
+__gmpn_lshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ shladd %r24,3,%r25,%r25
+ shladd %r24,3,%r26,%r26
+ subi 64,%r23,%r23
+ mtsar %r23
+ ldd -8(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r0,%r21,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ add %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ add %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd -16(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-8(%r26)
+L$7 ldd -24(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-16(%r26)
+L$6 ldd -32(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-24(%r26)
+L$5 ldd -40(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-32(%r26)
+L$4 ldd -48(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-40(%r26)
+L$3 ldd -56(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-48(%r26)
+L$2 ldd -64(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-56(%r26)
+L$1 ldd -72(%r25),%r21
+ ldo -64(%r25),%r25
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-64(%r26)
+ addib,> -8,%r24,L$loop
+ ldo -64(%r26),%r26
+
+L$end shrpd %r21,%r0,%sar,%r21
+ std %r21,-8(%r26)
+ bve (%r2)
+ .exit
+ extrd,u %r29,31,32,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/mul_1.S b/rts/gmp/mpn/pa64/mul_1.S
new file mode 100644
index 0000000000..ab310c1264
--- /dev/null
+++ b/rts/gmp/mpn/pa64/mul_1.S
@@ -0,0 +1,158 @@
+; HP-PA 2.0 64-bit __gmpn_mul_1 -- Multiply a limb vector with a limb and
+; store the result in a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb -56(%r30)
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0n
+ .code
+ .export __gmpn_mul_1,entry
+__gmpn_mul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ fldd -56(%r30),%fr5 ; s2limb passed on stack
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t2 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t3
+ add,dc t2,hi,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ extrd,u cylimb,31,32,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64/rshift.s b/rts/gmp/mpn/pa64/rshift.s
new file mode 100644
index 0000000000..f0730e2a91
--- /dev/null
+++ b/rts/gmp/mpn/pa64/rshift.s
@@ -0,0 +1,100 @@
+; HP-PA 2.0 __gmpn_rshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_rshift,entry
+__gmpn_rshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ mtsar %r23
+ ldd 0(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r21,%r0,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ sub %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd 8(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,0(%r26)
+L$7 ldd 16(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,8(%r26)
+L$6 ldd 24(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,16(%r26)
+L$5 ldd 32(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,24(%r26)
+L$4 ldd 40(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,32(%r26)
+L$3 ldd 48(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,40(%r26)
+L$2 ldd 56(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,48(%r26)
+L$1 ldd 64(%r25),%r21
+ ldo 64(%r25),%r25
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,56(%r26)
+ addib,> -8,%r24,L$loop
+ ldo 64(%r26),%r26
+
+L$end shrpd %r0,%r21,%sar,%r21
+ std %r21,0(%r26)
+ bve (%r2)
+ .exit
+ extrd,u %r29,31,32,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/sub_n.s b/rts/gmp/mpn/pa64/sub_n.s
new file mode 100644
index 0000000000..dda1f54b34
--- /dev/null
+++ b/rts/gmp/mpn/pa64/sub_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
+; and store difference in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_sub_n,entry
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ subi 1,%r29,%r29
+ bve (%r2)
+ .exit
+ ldi 0,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/submul_1.S b/rts/gmp/mpn/pa64/submul_1.S
new file mode 100644
index 0000000000..27666b99df
--- /dev/null
+++ b/rts/gmp/mpn/pa64/submul_1.S
@@ -0,0 +1,170 @@
+; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb -56(%r30)
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0n
+ .code
+ .export __gmpn_submul_1,entry
+__gmpn_submul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ fldd -56(%r30),%fr5 ; s2limb passed on stack
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t4
+ add,dc t2,hi,cylimb
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ extrd,u cylimb,31,32,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64/udiv_qrnnd.c b/rts/gmp/mpn/pa64/udiv_qrnnd.c
new file mode 100644
index 0000000000..1c9fe084db
--- /dev/null
+++ b/rts/gmp/mpn/pa64/udiv_qrnnd.c
@@ -0,0 +1,111 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define TWO64 18446744073709551616.0
+
+mp_limb_t
+#if __STDC__
+__MPN(udiv_qrnnd) (mp_limb_t n1, mp_limb_t n0, mp_limb_t d, mp_limb_t *r)
+#else
+__MPN(udiv_qrnnd) (n1, n0, d, r)
+ mp_limb_t n1;
+ mp_limb_t n0;
+ mp_limb_t d;
+ mp_limb_t *r;
+#endif
+{
+ mp_limb_t q1, q2, q;
+ mp_limb_t p1, p0;
+ double di, dq;
+
+ di = 1.0 / d;
+
+ /* Generate upper 53 bits of quotient. Be careful here; the `double'
+ quotient may be rounded to 2^64 which we cannot safely convert back
+ to a 64-bit integer. */
+ dq = (TWO64 * (double) n1 + (double) n0) * di;
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800LL;
+ else
+ q1 = (mp_limb_t) dq;
+
+ /* Multiply back in order to compare the product to the dividend. */
+ umul_ppmm (p1, p0, q1, d);
+
+ /* Was the 53-bit quotient greater that our sought quotient? Test the
+ sign of the partial remainder to find out. */
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ /* 53-bit quotient too large. Partial remainder is negative.
+ Compute the absolute value of the remainder in n1,,n0. */
+ n1 = p1 - (n1 + (p0 < n0));
+ n0 = p0 - n0;
+
+ /* Now use the partial remainder as new dividend to compute more bits of
+ quotient. This is an adjustment for the one we got previously. */
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 - q2;
+ if (n1 < p1 || (n1 == p1 && n0 <= p0))
+ {
+ n0 = p0 - n0;
+ }
+ else
+ {
+ n0 = p0 - n0;
+ n0 += d;
+ q--;
+ }
+ }
+ else
+ {
+ n1 = n1 - (p1 + (n0 < p0));
+ n0 = n0 - p0;
+
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 + q2;
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ n0 = n0 - p0;
+ n0 += d;
+ q--;
+ }
+ else
+ {
+ n0 = n0 - p0;
+ if (n0 >= d)
+ {
+ n0 -= d;
+ q++;
+ }
+ }
+ }
+
+ *r = n0;
+ return q;
+}
diff --git a/rts/gmp/mpn/pa64/umul_ppmm.S b/rts/gmp/mpn/pa64/umul_ppmm.S
new file mode 100644
index 0000000000..ceff2d752f
--- /dev/null
+++ b/rts/gmp/mpn/pa64/umul_ppmm.S
@@ -0,0 +1,74 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+#define p0 %r28
+#define p1 %r29
+#define t32 %r19
+#define t0 %r20
+#define t1 %r21
+#define x %r22
+#define m0 %r23
+#define m1 %r24
+ .level 2.0n
+ .code
+ .export __gmpn_umul_ppmm,entry
+__gmpn_umul_ppmm
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ ldo 128(%r30),%r30
+ depd %r25,31,32,%r26
+ std %r26,-64(%r30)
+ depd %r23,31,32,%r24
+ std %r24,-56(%r30)
+
+ ldw -180(%r30),%r31
+
+ fldd -64(%r30),%fr4
+ fldd -56(%r30),%fr5
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+
+ depdi,z 1,31,1,t32 ; t32 = 2^32
+
+ ldd -128(%r30),p0 ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),p1 ; hi = high 64 bit of product
+
+ add,l,*nuv m0,m1,x ; x = m1+m0
+ add,l t32,p1,p1 ; propagate carry to mid of p1
+ depd,z x,31,32,t0 ; lo32(m1+m0)
+ add t0,p0,p0
+ extrd,u x,31,32,t1 ; hi32(m1+m0)
+ add,dc t1,p1,p1
+
+ std p0,0(%r31) ; store low half of product
+ extrd,u p1,31,32,%r28 ; return high half of product
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/README b/rts/gmp/mpn/pa64w/README
new file mode 100644
index 0000000000..cf590a7b98
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/README
@@ -0,0 +1,2 @@
+This directory contains mpn functions for 64-bit PA-RISC 2.0
+using 64-bit pointers (2.0W).
diff --git a/rts/gmp/mpn/pa64w/add_n.s b/rts/gmp/mpn/pa64w/add_n.s
new file mode 100644
index 0000000000..1bb9e8fbc7
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/add_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+; store sum in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_add_n,entry
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/addmul_1.S b/rts/gmp/mpn/pa64w/addmul_1.S
new file mode 100644
index 0000000000..4799f90fc5
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/addmul_1.S
@@ -0,0 +1,168 @@
+; HP-PA 2.0 64-bit __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb %r23
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0w
+ .code
+ .export __gmpn_addmul_1,entry
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ std s2limb,-56(%r30)
+ fldd -56(%r30),%fr5
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,rlimb,rlimb
+ add,dc t2,hi,cylimb
+ add t4,rlimb,t3
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ copy cylimb,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/gmp-mparam.h b/rts/gmp/mpn/pa64w/gmp-mparam.h
new file mode 100644
index 0000000000..ee5a0a3ab7
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/gmp-mparam.h
@@ -0,0 +1,65 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values were measured on a PA8500 using the system compiler version
+ A.11.01.02. Presumably the PA8000 and PA8200 have the same timing
+ characteristic, but GCC might give somewhat different results.. */
+/* Generated by tuneup.c, 2000-07-25. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 18
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 105
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 46
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 83
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 58
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 134
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 56
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 26
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 1
+#endif
diff --git a/rts/gmp/mpn/pa64w/lshift.s b/rts/gmp/mpn/pa64w/lshift.s
new file mode 100644
index 0000000000..84f925a105
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/lshift.s
@@ -0,0 +1,103 @@
+; HP-PA 2.0 __gmpn_lshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_lshift,entry
+__gmpn_lshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ shladd %r24,3,%r25,%r25
+ shladd %r24,3,%r26,%r26
+ subi 64,%r23,%r23
+ mtsar %r23
+ ldd -8(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r0,%r21,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ add %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ add %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd -16(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-8(%r26)
+L$7 ldd -24(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-16(%r26)
+L$6 ldd -32(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-24(%r26)
+L$5 ldd -40(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-32(%r26)
+L$4 ldd -48(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-40(%r26)
+L$3 ldd -56(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-48(%r26)
+L$2 ldd -64(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-56(%r26)
+L$1 ldd -72(%r25),%r21
+ ldo -64(%r25),%r25
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-64(%r26)
+ addib,> -8,%r24,L$loop
+ ldo -64(%r26),%r26
+
+L$end shrpd %r21,%r0,%sar,%r21
+ std %r21,-8(%r26)
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/mul_1.S b/rts/gmp/mpn/pa64w/mul_1.S
new file mode 100644
index 0000000000..48f13fbd1b
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/mul_1.S
@@ -0,0 +1,159 @@
+; HP-PA 2.0 64-bit __gmpn_mul_1 -- Multiply a limb vector with a limb and
+; store the result in a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb %r23
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0w
+ .code
+ .export __gmpn_mul_1,entry
+__gmpn_mul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ std s2limb,-56(%r30)
+ fldd -56(%r30),%fr5
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t2 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t3
+ add,dc t2,hi,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ copy cylimb,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/rshift.s b/rts/gmp/mpn/pa64w/rshift.s
new file mode 100644
index 0000000000..2517cb1f87
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/rshift.s
@@ -0,0 +1,100 @@
+; HP-PA 2.0 __gmpn_rshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_rshift,entry
+__gmpn_rshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ mtsar %r23
+ ldd 0(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r21,%r0,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ sub %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd 8(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,0(%r26)
+L$7 ldd 16(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,8(%r26)
+L$6 ldd 24(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,16(%r26)
+L$5 ldd 32(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,24(%r26)
+L$4 ldd 40(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,32(%r26)
+L$3 ldd 48(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,40(%r26)
+L$2 ldd 56(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,48(%r26)
+L$1 ldd 64(%r25),%r21
+ ldo 64(%r25),%r25
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,56(%r26)
+ addib,> -8,%r24,L$loop
+ ldo 64(%r26),%r26
+
+L$end shrpd %r0,%r21,%sar,%r21
+ std %r21,0(%r26)
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/sub_n.s b/rts/gmp/mpn/pa64w/sub_n.s
new file mode 100644
index 0000000000..ad01e24aa7
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/sub_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
+; and store difference in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_sub_n,entry
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ subi 1,%r29,%r29
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/submul_1.S b/rts/gmp/mpn/pa64w/submul_1.S
new file mode 100644
index 0000000000..294f6239b2
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/submul_1.S
@@ -0,0 +1,171 @@
+; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb %r23
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0w
+ .code
+ .export __gmpn_submul_1,entry
+__gmpn_submul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ std s2limb,-56(%r30)
+ fldd -56(%r30),%fr5
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t4
+ add,dc t2,hi,cylimb
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ copy cylimb,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/udiv_qrnnd.c b/rts/gmp/mpn/pa64w/udiv_qrnnd.c
new file mode 100644
index 0000000000..1852913000
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/udiv_qrnnd.c
@@ -0,0 +1,117 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define TWO64 18446744073709551616.0
+#define TWO63 9223372036854775808.0
+
+mp_limb_t
+#if __STDC__
+__MPN(udiv_qrnnd) (mp_limb_t n1, mp_limb_t n0, mp_limb_t d, mp_limb_t *r)
+#else
+__MPN(udiv_qrnnd) (n1, n0, d, r)
+ mp_limb_t n1;
+ mp_limb_t n0;
+ mp_limb_t d;
+ mp_limb_t *r;
+#endif
+{
+ mp_limb_t q1, q2, q;
+ mp_limb_t p1, p0;
+ double di, dq;
+
+ di = 1.0 / d;
+
+ /* Generate upper 53 bits of quotient. Be careful here; the `double'
+ quotient may be rounded to 2^64 which we cannot safely convert back
+ to a 64-bit integer. */
+ dq = (TWO64 * (double) n1 + (double) n0) * di;
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800L;
+#ifndef __GNUC__
+ /* Work around HP compiler bug. */
+ else if (dq > TWO63)
+ q1 = (mp_limb_t) (dq - TWO63) + 0x8000000000000000L;
+#endif
+ else
+ q1 = (mp_limb_t) dq;
+
+ /* Multiply back in order to compare the product to the dividend. */
+ umul_ppmm (p1, p0, q1, d);
+
+ /* Was the 53-bit quotient greater that our sought quotient? Test the
+ sign of the partial remainder to find out. */
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ /* 53-bit quotient too large. Partial remainder is negative.
+ Compute the absolute value of the remainder in n1,,n0. */
+ n1 = p1 - (n1 + (p0 < n0));
+ n0 = p0 - n0;
+
+ /* Now use the partial remainder as new dividend to compute more bits of
+ quotient. This is an adjustment for the one we got previously. */
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 - q2;
+ if (n1 < p1 || (n1 == p1 && n0 <= p0))
+ {
+ n0 = p0 - n0;
+ }
+ else
+ {
+ n0 = p0 - n0;
+ n0 += d;
+ q--;
+ }
+ }
+ else
+ {
+ n1 = n1 - (p1 + (n0 < p0));
+ n0 = n0 - p0;
+
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 + q2;
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ n0 = n0 - p0;
+ n0 += d;
+ q--;
+ }
+ else
+ {
+ n0 = n0 - p0;
+ if (n0 >= d)
+ {
+ n0 -= d;
+ q++;
+ }
+ }
+ }
+
+ *r = n0;
+ return q;
+}
diff --git a/rts/gmp/mpn/pa64w/umul_ppmm.S b/rts/gmp/mpn/pa64w/umul_ppmm.S
new file mode 100644
index 0000000000..d9fb92be8c
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/umul_ppmm.S
@@ -0,0 +1,72 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+#define p0 %r28
+#define p1 %r29
+#define t32 %r19
+#define t0 %r20
+#define t1 %r21
+#define x %r22
+#define m0 %r23
+#define m1 %r24
+ .level 2.0w
+ .code
+ .export __gmpn_umul_ppmm,entry
+__gmpn_umul_ppmm
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ ldo 128(%r30),%r30
+ std %r26,-64(%r30)
+ std %r25,-56(%r30)
+
+ copy %r24,%r31
+
+ fldd -64(%r30),%fr4
+ fldd -56(%r30),%fr5
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+
+ depdi,z 1,31,1,t32 ; t32 = 2^32
+
+ ldd -128(%r30),p0 ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),p1 ; hi = high 64 bit of product
+
+ add,l,*nuv m0,m1,x ; x = m1+m0
+ add,l t32,p1,p1 ; propagate carry to mid of p1
+ depd,z x,31,32,t0 ; lo32(m1+m0)
+ add t0,p0,p0
+ extrd,u x,31,32,t1 ; hi32(m1+m0)
+ add,dc t1,p1,p1
+
+ std p0,0(%r31) ; store low half of product
+ copy p1,%r28 ; return high half of product
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/power/add_n.s b/rts/gmp/mpn/power/add_n.s
new file mode 100644
index 0000000000..0f9f48f1cc
--- /dev/null
+++ b/rts/gmp/mpn/power/add_n.s
@@ -0,0 +1,79 @@
+# IBM POWER __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
+# Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+ .toc
+ .globl __gmpn_add_n
+ .globl .__gmpn_add_n
+ .csect __gmpn_add_n[DS]
+__gmpn_add_n:
+ .long .__gmpn_add_n, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_add_n:
+ andil. 10,6,1 # odd or even number of limbs?
+ l 8,0(4) # load least significant s1 limb
+ l 0,0(5) # load least significant s2 limb
+ cal 3,-4(3) # offset res_ptr, it's updated before it's used
+ sri 10,6,1 # count for unrolled loop
+ a 7,0,8 # add least significant limbs, set cy
+ mtctr 10 # copy count into CTR
+ beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs. Add the first limbs separately.
+ cmpi 1,10,0 # is count for unrolled loop zero?
+ bc 4,6,L1 # bne cr1,L1 (misassembled by gas)
+ st 7,4(3)
+ aze 3,10 # use the fact that r10 is zero...
+ br # return
+
+# We added least significant limbs. Now reload the next limbs to enter loop.
+L1: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ stu 7,4(3)
+ ae 7,0,8 # add limbs, set cy
+Leven: lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ bdz Lend # If done, skip loop
+
+Loop: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ ae 11,9,10 # add previous limbs with cy, set cy
+ stu 7,4(3) #
+ lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ ae 7,0,8 # add previous limbs with cy, set cy
+ stu 11,4(3) #
+ bdn Loop # decrement CTR and loop back
+
+Lend: ae 11,9,10 # add limbs with cy, set cy
+ st 7,4(3) #
+ st 11,8(3) #
+ lil 3,0 # load cy into ...
+ aze 3,3 # ... return value register
+ br
diff --git a/rts/gmp/mpn/power/addmul_1.s b/rts/gmp/mpn/power/addmul_1.s
new file mode 100644
index 0000000000..8ecc651579
--- /dev/null
+++ b/rts/gmp/mpn/power/addmul_1.s
@@ -0,0 +1,122 @@
+# IBM POWER __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+# The POWER architecture has no unsigned 32x32->64 bit multiplication
+# instruction. To obtain that operation, we have to use the 32x32->64 signed
+# multiplication instruction, and add the appropriate compensation to the high
+# limb of the result. We add the multiplicand if the multiplier has its most
+# significant bit set, and we add the multiplier if the multiplicand has its
+# most significant bit set. We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work). Since the POWER architecture has a branch unit we
+# can branch in zero cycles, so that's how we perform the additions.
+
+ .toc
+ .globl __gmpn_addmul_1
+ .globl .__gmpn_addmul_1
+ .csect __gmpn_addmul_1[DS]
+__gmpn_addmul_1:
+ .long .__gmpn_addmul_1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_addmul_1:
+
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 8
+ cax 9,9,7
+ l 7,4(3)
+ a 8,8,7 # add res_limb
+ blt Lneg
+Lpos: bdz Lend
+
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 8,0,9 # low limb + old_cy_limb + old cy
+ l 7,4(3)
+ aze 10,10 # propagate cy to new cy_limb
+ a 8,8,7 # add res_limb
+ bge Lp0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 8,0,10
+ l 7,4(3)
+ aze 9,9
+ a 8,8,7
+ bge Lp1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 7
+ ae 8,7,9
+ l 7,4(3)
+ ae 10,10,0 # propagate cy to new cy_limb
+ a 8,8,7 # add res_limb
+ bge Ln0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 7
+ ae 8,7,10
+ l 7,4(3)
+ ae 9,9,0 # propagate cy to new cy_limb
+ a 8,8,7 # add res_limb
+ bge Ln1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
diff --git a/rts/gmp/mpn/power/lshift.s b/rts/gmp/mpn/power/lshift.s
new file mode 100644
index 0000000000..ab71fb7727
--- /dev/null
+++ b/rts/gmp/mpn/power/lshift.s
@@ -0,0 +1,56 @@
+# IBM POWER __gmpn_lshift --
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s_ptr r4
+# size r5
+# cnt r6
+
+ .toc
+ .globl __gmpn_lshift
+ .globl .__gmpn_lshift
+ .csect __gmpn_lshift[DS]
+__gmpn_lshift:
+ .long .__gmpn_lshift, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_lshift:
+ sli 0,5,2
+ cax 9,3,0
+ cax 4,4,0
+ sfi 8,6,32
+ mtctr 5 # put limb count in CTR loop register
+ lu 0,-4(4) # read most significant limb
+ sre 3,0,8 # compute carry out limb, and init MQ register
+ bdz Lend2 # if just one limb, skip loop
+ lu 0,-4(4) # read 2:nd most significant limb
+ sreq 7,0,8 # compute most significant limb of result
+ bdz Lend # if just two limb, skip loop
+Loop: lu 0,-4(4) # load next lower limb
+ stu 7,-4(9) # store previous result during read latency
+ sreq 7,0,8 # compute result limb
+ bdn Loop # loop back until CTR is zero
+Lend: stu 7,-4(9) # store 2:nd least significant limb
+Lend2: sle 7,0,6 # compute least significant limb
+ st 7,-4(9) # store it" \
+ br
diff --git a/rts/gmp/mpn/power/mul_1.s b/rts/gmp/mpn/power/mul_1.s
new file mode 100644
index 0000000000..4e08ade583
--- /dev/null
+++ b/rts/gmp/mpn/power/mul_1.s
@@ -0,0 +1,109 @@
+# IBM POWER __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+# The POWER architecture has no unsigned 32x32->64 bit multiplication
+# instruction. To obtain that operation, we have to use the 32x32->64 signed
+# multiplication instruction, and add the appropriate compensation to the high
+# limb of the result. We add the multiplicand if the multiplier has its most
+# significant bit set, and we add the multiplier if the multiplicand has its
+# most significant bit set. We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work). Since the POWER architecture has a branch unit we
+# can branch in zero cycles, so that's how we perform the additions.
+
+ .toc
+ .globl __gmpn_mul_1
+ .globl .__gmpn_mul_1
+ .csect __gmpn_mul_1[DS]
+__gmpn_mul_1:
+ .long .__gmpn_mul_1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_mul_1:
+
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 8
+ ai 0,0,0 # reset carry
+ cax 9,9,7
+ blt Lneg
+Lpos: bdz Lend
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 8,0,9
+ bge Lp0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 8,0,10
+ bge Lp1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ cax 10,10,0 # adjust high limb for negative s2_limb
+ mfmq 0
+ ae 8,0,9
+ bge Ln0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ cax 9,9,0 # adjust high limb for negative s2_limb
+ mfmq 0
+ ae 8,0,10
+ bge Ln1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
diff --git a/rts/gmp/mpn/power/rshift.s b/rts/gmp/mpn/power/rshift.s
new file mode 100644
index 0000000000..65b3945f8a
--- /dev/null
+++ b/rts/gmp/mpn/power/rshift.s
@@ -0,0 +1,54 @@
+# IBM POWER __gmpn_rshift --
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s_ptr r4
+# size r5
+# cnt r6
+
+ .toc
+ .globl __gmpn_rshift
+ .globl .__gmpn_rshift
+ .csect __gmpn_rshift[DS]
+__gmpn_rshift:
+ .long .__gmpn_rshift, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_rshift:
+ sfi 8,6,32
+ mtctr 5 # put limb count in CTR loop register
+ l 0,0(4) # read least significant limb
+ ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
+ sle 3,0,8 # compute carry limb, and init MQ register
+ bdz Lend2 # if just one limb, skip loop
+ lu 0,4(4) # read 2:nd least significant limb
+ sleq 7,0,8 # compute least significant limb of result
+ bdz Lend # if just two limb, skip loop
+Loop: lu 0,4(4) # load next higher limb
+ stu 7,4(9) # store previous result during read latency
+ sleq 7,0,8 # compute result limb
+ bdn Loop # loop back until CTR is zero
+Lend: stu 7,4(9) # store 2:nd most significant limb
+Lend2: sre 7,0,6 # compute most significant limb
+ st 7,4(9) # store it" \
+ br
diff --git a/rts/gmp/mpn/power/sdiv.s b/rts/gmp/mpn/power/sdiv.s
new file mode 100644
index 0000000000..81da622fbc
--- /dev/null
+++ b/rts/gmp/mpn/power/sdiv.s
@@ -0,0 +1,34 @@
+# Copyright (C) 1999 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+ .toc
+ .globl __sdiv_qrnnd
+ .globl .__sdiv_qrnnd
+ .csect __sdiv_qrnnd[DS]
+__sdiv_qrnnd:
+ .long .__sdiv_qrnnd, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__sdiv_qrnnd:
+ mtmq 5
+ div 0,4,6
+ mfmq 9
+ st 9,0(3)
+ mr 3,0
+ br
diff --git a/rts/gmp/mpn/power/sub_n.s b/rts/gmp/mpn/power/sub_n.s
new file mode 100644
index 0000000000..aa09cf5bc1
--- /dev/null
+++ b/rts/gmp/mpn/power/sub_n.s
@@ -0,0 +1,80 @@
+# IBM POWER __gmpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
+# Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+ .toc
+ .globl __gmpn_sub_n
+ .globl .__gmpn_sub_n
+ .csect __gmpn_sub_n[DS]
+__gmpn_sub_n:
+ .long .__gmpn_sub_n, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_sub_n:
+ andil. 10,6,1 # odd or even number of limbs?
+ l 8,0(4) # load least significant s1 limb
+ l 0,0(5) # load least significant s2 limb
+ cal 3,-4(3) # offset res_ptr, it's updated before it's used
+ sri 10,6,1 # count for unrolled loop
+ sf 7,0,8 # subtract least significant limbs, set cy
+ mtctr 10 # copy count into CTR
+ beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs. Add the first limbs separately.
+ cmpi 1,10,0 # is count for unrolled loop zero?
+ bc 4,6,L1 # bne cr1,L1 (misassembled by gas)
+ st 7,4(3)
+ sfe 3,0,0 # load !cy into ...
+ sfi 3,3,0 # ... return value register
+ br # return
+
+# We added least significant limbs. Now reload the next limbs to enter loop.
+L1: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ stu 7,4(3)
+ sfe 7,0,8 # subtract limbs, set cy
+Leven: lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ bdz Lend # If done, skip loop
+
+Loop: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ sfe 11,10,9 # subtract previous limbs with cy, set cy
+ stu 7,4(3) #
+ lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ sfe 7,0,8 # subtract previous limbs with cy, set cy
+ stu 11,4(3) #
+ bdn Loop # decrement CTR and loop back
+
+Lend: sfe 11,10,9 # subtract limbs with cy, set cy
+ st 7,4(3) #
+ st 11,8(3) #
+ sfe 3,0,0 # load !cy into ...
+ sfi 3,3,0 # ... return value register
+ br
diff --git a/rts/gmp/mpn/power/submul_1.s b/rts/gmp/mpn/power/submul_1.s
new file mode 100644
index 0000000000..bc01b7c95d
--- /dev/null
+++ b/rts/gmp/mpn/power/submul_1.s
@@ -0,0 +1,127 @@
+# IBM POWER __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+# The POWER architecture has no unsigned 32x32->64 bit multiplication
+# instruction. To obtain that operation, we have to use the 32x32->64 signed
+# multiplication instruction, and add the appropriate compensation to the high
+# limb of the result. We add the multiplicand if the multiplier has its most
+# significant bit set, and we add the multiplier if the multiplicand has its
+# most significant bit set. We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work). Since the POWER architecture has a branch unit we
+# can branch in zero cycles, so that's how we perform the additions.
+
+ .toc
+ .globl __gmpn_submul_1
+ .globl .__gmpn_submul_1
+ .csect __gmpn_submul_1[DS]
+__gmpn_submul_1:
+ .long .__gmpn_submul_1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_submul_1:
+
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 11
+ cax 9,9,7
+ l 7,4(3)
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ blt Lneg
+Lpos: bdz Lend
+
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 11,0,9 # low limb + old_cy_limb + old cy
+ l 7,4(3)
+ aze 10,10 # propagate cy to new cy_limb
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Lp0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 11,0,10
+ l 7,4(3)
+ aze 9,9
+ sf 8,11,7
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Lp1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 7
+ ae 11,7,9
+ l 7,4(3)
+ ae 10,10,0 # propagate cy to new cy_limb
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Ln0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 7
+ ae 11,7,10
+ l 7,4(3)
+ ae 9,9,0 # propagate cy to new cy_limb
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Ln1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
diff --git a/rts/gmp/mpn/power/umul.s b/rts/gmp/mpn/power/umul.s
new file mode 100644
index 0000000000..8c77496380
--- /dev/null
+++ b/rts/gmp/mpn/power/umul.s
@@ -0,0 +1,38 @@
+# Copyright (C) 1999 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+ .toc
+ .globl __umul_ppmm
+ .globl .__umul_ppmm
+ .csect __umul_ppmm[DS]
+__umul_ppmm:
+ .long .__umul_ppmm, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__umul_ppmm:
+ mul 9,4,5
+ srai 0,4,31
+ and 0,0,5
+ srai 5,5,31
+ and 5,5,4
+ cax 0,0,5
+ mfmq 11
+ st 11,0(3)
+ cax 3,9,0
+ br
diff --git a/rts/gmp/mpn/powerpc32/add_n.asm b/rts/gmp/mpn/powerpc32/add_n.asm
new file mode 100644
index 0000000000..81ed04b162
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/add_n.asm
@@ -0,0 +1,61 @@
+dnl PowerPC-32 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl s2_ptr r5
+dnl size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ mtctr r6 C copy size into CTR
+ addic r0,r0,0 C clear cy
+ lwz r8,0(r4) C load least significant s1 limb
+ lwz r0,0(r5) C load least significant s2 limb
+ addi r3,r3,-4 C offset res_ptr, it's updated before it's used
+ bdz .Lend C If done, skip loop
+.Loop: lwz r9,4(r4) C load s1 limb
+ lwz r10,4(r5) C load s2 limb
+ adde r7,r0,r8 C add limbs with cy, set cy
+ stw r7,4(r3) C store result limb
+ bdz .Lexit C decrement CTR and exit if done
+ lwzu r8,8(r4) C load s1 limb and update s1_ptr
+ lwzu r0,8(r5) C load s2 limb and update s2_ptr
+ adde r7,r10,r9 C add limbs with cy, set cy
+ stwu r7,8(r3) C store result limb and update res_ptr
+ bdnz .Loop C decrement CTR and loop back
+
+.Lend: adde r7,r0,r8
+ stw r7,4(r3) C store ultimate result limb
+ li r3,0 C load cy into ...
+ addze r3,r3 C ... return value register
+ blr
+.Lexit: adde r7,r10,r9
+ stw r7,8(r3)
+ li r3,0 C load cy into ...
+ addze r3,r3 C ... return value register
+ blr
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/powerpc32/addmul_1.asm b/rts/gmp/mpn/powerpc32/addmul_1.asm
new file mode 100644
index 0000000000..3ef75b1532
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/addmul_1.asm
@@ -0,0 +1,124 @@
+dnl PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603
+dnl or PPC750 since I don't have access to any such machines.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ cmpi cr0,r5,9 C more than 9 limbs?
+ bgt cr0,.Lbig C branch if more than 9 limbs
+
+ mtctr r5
+ lwz r0,0(r4)
+ mullw r7,r0,r6
+ mulhwu r10,r0,r6
+ lwz r9,0(r3)
+ addc r8,r7,r9
+ addi r3,r3,-4
+ bdz .Lend
+.Lloop:
+ lwzu r0,4(r4)
+ stwu r8,4(r3)
+ mullw r8,r0,r6
+ adde r7,r8,r10
+ mulhwu r10,r0,r6
+ lwz r9,4(r3)
+ addze r10,r10
+ addc r8,r7,r9
+ bdnz .Lloop
+.Lend: stw r8,4(r3)
+ addze r3,r10
+ blr
+
+.Lbig: stmw r30,-32(r1)
+ addi r5,r5,-1
+ srwi r0,r5,2
+ mtctr r0
+
+ lwz r7,0(r4)
+ mullw r8,r7,r6
+ mulhwu r0,r7,r6
+ lwz r7,0(r3)
+ addc r8,r8,r7
+ stw r8,0(r3)
+
+.LloopU:
+ lwz r7,4(r4)
+ lwz r12,8(r4)
+ lwz r30,12(r4)
+ lwzu r31,16(r4)
+ mullw r8,r7,r6
+ mullw r9,r12,r6
+ mullw r10,r30,r6
+ mullw r11,r31,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ adde r9,r9,r0
+ mulhwu r0,r12,r6
+ lwz r12,8(r3)
+ adde r10,r10,r0
+ mulhwu r0,r30,r6
+ lwz r30,12(r3)
+ adde r11,r11,r0
+ mulhwu r0,r31,r6
+ lwz r31,16(r3)
+ addze r0,r0 C new cy_limb
+ addc r8,r8,r7
+ stw r8,4(r3)
+ adde r9,r9,r12
+ stw r9,8(r3)
+ adde r10,r10,r30
+ stw r10,12(r3)
+ adde r11,r11,r31
+ stwu r11,16(r3)
+ bdnz .LloopU
+
+ andi. r31,r5,3
+ mtctr r31
+ beq cr0,.Lendx
+
+.LloopE:
+ lwzu r7,4(r4)
+ mullw r8,r7,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ addze r0,r0 C new cy_limb
+ addc r8,r8,r7
+ stwu r8,4(r3)
+ bdnz .LloopE
+.Lendx:
+ addze r3,r0
+ lmw r30,-32(r1)
+ blr
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/powerpc32/aix.m4 b/rts/gmp/mpn/powerpc32/aix.m4
new file mode 100644
index 0000000000..2bd8425817
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/aix.m4
@@ -0,0 +1,39 @@
+divert(-1)
+dnl m4 macros for AIX 32-bit assembly.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+define(`ASM_START',
+ `.toc')
+
+define(`PROLOGUE',
+ `
+ .globl $1
+ .globl .$1
+ .csect $1[DS],2
+$1:
+ .long .$1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.$1:')
+
+define(`EPILOGUE', `')
+
+divert
diff --git a/rts/gmp/mpn/powerpc32/gmp-mparam.h b/rts/gmp/mpn/powerpc32/gmp-mparam.h
new file mode 100644
index 0000000000..b283185789
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/gmp-mparam.h
@@ -0,0 +1,66 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the 604. Presumably, these should be considerably
+ different for the 603 and 750 that have much slower multiply
+ instructions. */
+
+/* Generated by tuneup.c, 2000-05-26. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 26 /* tuneup says 20 */
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 228
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 46 /* tuneup says 44 */
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 262
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 52
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 86
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 23
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 7
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 53
+#endif
diff --git a/rts/gmp/mpn/powerpc32/lshift.asm b/rts/gmp/mpn/powerpc32/lshift.asm
new file mode 100644
index 0000000000..73a85430ab
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/lshift.asm
@@ -0,0 +1,145 @@
+dnl PowerPC-32 mpn_lshift -- Shift a number left.
+
+dnl Copyright (C) 1995, 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cmpi cr0,r5,12 C more than 12 limbs?
+ slwi r0,r5,2
+ add r4,r4,r0 C make r4 point at end of s1
+ add r7,r3,r0 C make r7 point at end of res
+ bgt .LBIG C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8,r6,32
+ lwzu r11,-4(r4) C load first s1 limb
+ srw r3,r11,r8 C compute function return value
+ bdz .Lend1
+
+.Loop: lwzu r10,-4(r4)
+ slw r9,r11,r6
+ srw r12,r10,r8
+ or r9,r9,r12
+ stwu r9,-4(r7)
+ bdz .Lend2
+ lwzu r11,-4(r4)
+ slw r9,r10,r6
+ srw r12,r11,r8
+ or r9,r9,r12
+ stwu r9,-4(r7)
+ bdnz .Loop
+
+.Lend1: slw r0,r11,r6
+ stw r0,-4(r7)
+ blr
+.Lend2: slw r0,r10,r6
+ stw r0,-4(r7)
+ blr
+
+.LBIG:
+ stmw r24,-32(r1) C save registers we are supposed to preserve
+ lwzu r9,-4(r4)
+ subfic r8,r6,32
+ srw r3,r9,r8 C compute function return value
+ slw r0,r9,r6
+ addi r5,r5,-1
+
+ andi. r10,r5,3 C count for spill loop
+ beq .Le
+ mtctr r10
+ lwzu r28,-4(r4)
+ bdz .Lxe0
+
+.Loop0: slw r12,r28,r6
+ srw r24,r28,r8
+ lwzu r28,-4(r4)
+ or r24,r0,r24
+ stwu r24,-4(r7)
+ mr r0,r12
+ bdnz .Loop0 C taken at most once!
+
+.Lxe0: slw r12,r28,r6
+ srw r24,r28,r8
+ or r24,r0,r24
+ stwu r24,-4(r7)
+ mr r0,r12
+
+.Le: srwi r5,r5,2 C count for unrolled loop
+ addi r5,r5,-1
+ mtctr r5
+ lwz r28,-4(r4)
+ lwz r29,-8(r4)
+ lwz r30,-12(r4)
+ lwzu r31,-16(r4)
+
+.LoopU: slw r9,r28,r6
+ srw r24,r28,r8
+ lwz r28,-4(r4)
+ slw r10,r29,r6
+ srw r25,r29,r8
+ lwz r29,-8(r4)
+ slw r11,r30,r6
+ srw r26,r30,r8
+ lwz r30,-12(r4)
+ slw r12,r31,r6
+ srw r27,r31,r8
+ lwzu r31,-16(r4)
+ or r24,r0,r24
+ stw r24,-4(r7)
+ or r25,r9,r25
+ stw r25,-8(r7)
+ or r26,r10,r26
+ stw r26,-12(r7)
+ or r27,r11,r27
+ stwu r27,-16(r7)
+ mr r0,r12
+ bdnz .LoopU
+
+ slw r9,r28,r6
+ srw r24,r28,r8
+ slw r10,r29,r6
+ srw r25,r29,r8
+ slw r11,r30,r6
+ srw r26,r30,r8
+ slw r12,r31,r6
+ srw r27,r31,r8
+ or r24,r0,r24
+ stw r24,-4(r7)
+ or r25,r9,r25
+ stw r25,-8(r7)
+ or r26,r10,r26
+ stw r26,-12(r7)
+ or r27,r11,r27
+ stwu r27,-16(r7)
+ mr r0,r12
+
+ stw r0,-4(r7)
+ lmw r24,-32(r1) C restore registers
+ blr
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/powerpc32/mul_1.asm b/rts/gmp/mpn/powerpc32/mul_1.asm
new file mode 100644
index 0000000000..ec878b54d5
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/mul_1.asm
@@ -0,0 +1,86 @@
+dnl PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl This is optimized for the PPC604 but it runs decently even on PPC601. It
+dnl has not been tested on a PPC603 since I don't have access to any such
+dnl machines.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mtctr r5
+ addi r3,r3,-4 C adjust res_ptr, it's offset before it's used
+ li r12,0 C clear upper product reg
+ addic r0,r0,0 C clear cy
+C Start software pipeline
+ lwz r8,0(r4)
+ bdz .Lend3
+ stmw r30,-8(r1) C save registers we are supposed to preserve
+ lwzu r9,4(r4)
+ mullw r11,r8,r6
+ mulhwu r0,r8,r6
+ bdz .Lend1
+C Software pipelined main loop
+.Loop: lwz r8,4(r4)
+ mullw r10,r9,r6
+ adde r30,r11,r12
+ mulhwu r12,r9,r6
+ stw r30,4(r3)
+ bdz .Lend2
+ lwzu r9,8(r4)
+ mullw r11,r8,r6
+ adde r31,r10,r0
+ mulhwu r0,r8,r6
+ stwu r31,8(r3)
+ bdnz .Loop
+C Finish software pipeline
+.Lend1: mullw r10,r9,r6
+ adde r30,r11,r12
+ mulhwu r12,r9,r6
+ stw r30,4(r3)
+ adde r31,r10,r0
+ stwu r31,8(r3)
+ addze r3,r12
+ lmw r30,-8(r1) C restore registers from stack
+ blr
+.Lend2: mullw r11,r8,r6
+ adde r31,r10,r0
+ mulhwu r0,r8,r6
+ stwu r31,8(r3)
+ adde r30,r11,r12
+ stw r30,4(r3)
+ addze r3,r0
+ lmw r30,-8(r1) C restore registers from stack
+ blr
+.Lend3: mullw r11,r8,r6
+ stw r11,4(r3)
+ mulhwu r3,r8,r6
+ blr
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/powerpc32/regmap.m4 b/rts/gmp/mpn/powerpc32/regmap.m4
new file mode 100644
index 0000000000..978f18902a
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/regmap.m4
@@ -0,0 +1,34 @@
+divert(-1)
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Map register names r0, r1, etc, to just `0', `1', etc.
+dnl This is needed on all systems but NeXT, Rhapsody, and MacOS-X
+forloop(i,0,31,
+`define(`r'i,i)'
+)
+
+dnl Likewise for cr0, cr1, etc.
+forloop(i,0,7,
+`define(`cr'i,i)'
+)
+
+divert
diff --git a/rts/gmp/mpn/powerpc32/rshift.asm b/rts/gmp/mpn/powerpc32/rshift.asm
new file mode 100644
index 0000000000..a09ba04938
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/rshift.asm
@@ -0,0 +1,60 @@
+dnl PowerPC-32 mpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mtctr r5 C copy size into CTR
+ addi r7,r3,-4 C move adjusted res_ptr to free return reg
+ subfic r8,r6,32
+ lwz r11,0(r4) C load first s1 limb
+ slw r3,r11,r8 C compute function return value
+ bdz .Lend1
+
+.Loop: lwzu r10,4(r4)
+ srw r9,r11,r6
+ slw r12,r10,r8
+ or r9,r9,r12
+ stwu r9,4(r7)
+ bdz .Lend2
+ lwzu r11,4(r4)
+ srw r9,r10,r6
+ slw r12,r11,r8
+ or r9,r9,r12
+ stwu r9,4(r7)
+ bdnz .Loop
+
+.Lend1: srw r0,r11,r6
+ stw r0,4(r7)
+ blr
+
+.Lend2: srw r0,r10,r6
+ stw r0,4(r7)
+ blr
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/powerpc32/sub_n.asm b/rts/gmp/mpn/powerpc32/sub_n.asm
new file mode 100644
index 0000000000..b04b4192ef
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/sub_n.asm
@@ -0,0 +1,61 @@
+dnl PowerPC-32 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl and store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl s2_ptr r5
+dnl size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ mtctr r6 C copy size into CTR
+ addic r0,r6,-1 C set cy
+ lwz r8,0(r4) C load least significant s1 limb
+ lwz r0,0(r5) C load least significant s2 limb
+ addi r3,r3,-4 C offset res_ptr, it's updated before it's used
+ bdz .Lend C If done, skip loop
+.Loop: lwz r9,4(r4) C load s1 limb
+ lwz r10,4(r5) C load s2 limb
+ subfe r7,r0,r8 C subtract limbs with cy, set cy
+ stw r7,4(r3) C store result limb
+ bdz .Lexit C decrement CTR and exit if done
+ lwzu r8,8(r4) C load s1 limb and update s1_ptr
+ lwzu r0,8(r5) C load s2 limb and update s2_ptr
+ subfe r7,r10,r9 C subtract limbs with cy, set cy
+ stwu r7,8(r3) C store result limb and update res_ptr
+ bdnz .Loop C decrement CTR and loop back
+
+.Lend: subfe r7,r0,r8
+ stw r7,4(r3) C store ultimate result limb
+ subfe r3,r0,r0 C load !cy into ...
+ subfic r3,r3,0 C ... return value register
+ blr
+.Lexit: subfe r7,r10,r9
+ stw r7,8(r3)
+ subfe r3,r0,r0 C load !cy into ...
+ subfic r3,r3,0 C ... return value register
+ blr
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/powerpc32/submul_1.asm b/rts/gmp/mpn/powerpc32/submul_1.asm
new file mode 100644
index 0000000000..a129e9f9ea
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/submul_1.asm
@@ -0,0 +1,130 @@
+dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603
+dnl or PPC750 since I don't have access to any such machines.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ cmpi cr0,r5,9 C more than 9 limbs?
+ bgt cr0,.Lbig C branch if more than 9 limbs
+
+ mtctr r5
+ lwz r0,0(r4)
+ mullw r7,r0,r6
+ mulhwu r10,r0,r6
+ lwz r9,0(r3)
+ subfc r8,r7,r9
+ addc r7,r7,r8 C invert cy (r7 is junk)
+ addi r3,r3,-4
+ bdz .Lend
+.Lloop:
+ lwzu r0,4(r4)
+ stwu r8,4(r3)
+ mullw r8,r0,r6
+ adde r7,r8,r10
+ mulhwu r10,r0,r6
+ lwz r9,4(r3)
+ addze r10,r10
+ subfc r8,r7,r9
+ addc r7,r7,r8 C invert cy (r7 is junk)
+ bdnz .Lloop
+.Lend: stw r8,4(r3)
+ addze r3,r10
+ blr
+
+.Lbig: stmw r30,-32(r1)
+ addi r5,r5,-1
+ srwi r0,r5,2
+ mtctr r0
+
+ lwz r7,0(r4)
+ mullw r8,r7,r6
+ mulhwu r0,r7,r6
+ lwz r7,0(r3)
+ subfc r7,r8,r7
+ addc r8,r8,r7
+ stw r7,0(r3)
+
+.LloopU:
+ lwz r7,4(r4)
+ lwz r12,8(r4)
+ lwz r30,12(r4)
+ lwzu r31,16(r4)
+ mullw r8,r7,r6
+ mullw r9,r12,r6
+ mullw r10,r30,r6
+ mullw r11,r31,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ adde r9,r9,r0
+ mulhwu r0,r12,r6
+ lwz r12,8(r3)
+ adde r10,r10,r0
+ mulhwu r0,r30,r6
+ lwz r30,12(r3)
+ adde r11,r11,r0
+ mulhwu r0,r31,r6
+ lwz r31,16(r3)
+ addze r0,r0 C new cy_limb
+ subfc r7,r8,r7
+ stw r7,4(r3)
+ subfe r12,r9,r12
+ stw r12,8(r3)
+ subfe r30,r10,r30
+ stw r30,12(r3)
+ subfe r31,r11,r31
+ stwu r31,16(r3)
+ subfe r11,r11,r11 C invert ...
+ addic r11,r11,1 C ... carry
+ bdnz .LloopU
+
+ andi. r31,r5,3
+ mtctr r31
+ beq cr0,.Lendx
+
+.LloopE:
+ lwzu r7,4(r4)
+ mullw r8,r7,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ addze r0,r0 C new cy_limb
+ subfc r7,r8,r7
+ addc r8,r8,r7
+ stwu r7,4(r3)
+ bdnz .LloopE
+.Lendx:
+ addze r3,r0
+ lmw r30,-32(r1)
+ blr
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/powerpc32/umul.asm b/rts/gmp/mpn/powerpc32/umul.asm
new file mode 100644
index 0000000000..eeaa0a4dc8
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/umul.asm
@@ -0,0 +1,32 @@
+dnl PowerPC-32 umul_ppmm -- support for longlong.h
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ mullw 0,4,5
+ mulhwu 9,4,5
+ stw 0,0(3)
+ mr 3,9
+ blr
+EPILOGUE(mpn_umul_ppmm)
diff --git a/rts/gmp/mpn/powerpc64/README b/rts/gmp/mpn/powerpc64/README
new file mode 100644
index 0000000000..c779276917
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/README
@@ -0,0 +1,36 @@
+PPC630 (aka Power3) pipeline information:
+
+Decoding is 4-way and issue is 8-way with some out-of-order capability.
+LS1 - ld/st unit 1
+LS2 - ld/st unit 2
+FXU1 - integer unit 1, handles any simple integer instructions
+FXU2 - integer unit 2, handles any simple integer instructions
+FXU3 - integer unit 3, handles integer multiply and divide
+FPU1 - floating-point unit 1
+FPU2 - floating-point unit 2
+
+Memory: Any two memory operations can issue, but memory subsystem
+ can sustain just one store per cycle.
+Simple integer: 2 operations (such as add, rl*)
+Integer multiply: 1 operation every 9th cycle worst case; exact timing depends
+ on 2nd operand most significant bit position (10 bits per
+ cycle). Multiply unit is not pipelined, only one multiply
+ operation in progress is allowed.
+Integer divide: ?
+Floating-point: Any plain 2 arithmetic instructions (such as fmul, fadd, fmadd)
+ Latency = 4.
+Floating-point divide:
+ ?
+Floating-point square root:
+ ?
+
+Best possible times for the main loops:
+shift: 1.5 cycles limited by integer unit contention.
+ With 63 special loops, one for each shift count, we could
+ reduce the needed integer instructions to 2, which would
+ reduce the best possible time to 1 cycle.
+add/sub: 1.5 cycles, limited by ld/st unit contention.
+mul: 18 cycles (average) unless floating-point operations are used,
+ but that would only help for multiplies of perhaps 10 and more
+ limbs.
+addmul/submul:Same situation as for mul.
diff --git a/rts/gmp/mpn/powerpc64/add_n.asm b/rts/gmp/mpn/powerpc64/add_n.asm
new file mode 100644
index 0000000000..c3325376dc
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/add_n.asm
@@ -0,0 +1,61 @@
+# PowerPC-64 mpn_add_n -- Add two limb vectors of the same length > 0 and
+# store sum in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ mtctr r6 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ ld r8,0(r4) # load least significant s1 limb
+ ld r0,0(r5) # load least significant s2 limb
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ bdz .Lend # If done, skip loop
+.Loop: ld r9,8(r4) # load s1 limb
+ ld r10,8(r5) # load s2 limb
+ adde r7,r0,r8 # add limbs with cy, set cy
+ std r7,8(r3) # store result limb
+ bdz .Lexit # decrement CTR and exit if done
+ ldu r8,16(r4) # load s1 limb and update s1_ptr
+ ldu r0,16(r5) # load s2 limb and update s2_ptr
+ adde r7,r10,r9 # add limbs with cy, set cy
+ stdu r7,16(r3) # store result limb and update res_ptr
+ bdnz .Loop # decrement CTR and loop back
+
+.Lend: adde r7,r0,r8
+ std r7,8(r3) # store ultimate result limb
+ li r3,0 # load cy into ...
+ addze r3,r3 # ... return value register
+ blr
+.Lexit: adde r7,r10,r9
+ std r7,16(r3)
+ li r3,0 # load cy into ...
+ addze r3,r3 # ... return value register
+ blr
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/powerpc64/addmul_1.asm b/rts/gmp/mpn/powerpc64/addmul_1.asm
new file mode 100644
index 0000000000..81774482fe
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addmul_1.asm
@@ -0,0 +1,52 @@
+# PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ ld 10,8(3)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ addze 9,9
+ addc 7,7,10
+ stdu 7,8(3)
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/powerpc64/addsub_n.asm b/rts/gmp/mpn/powerpc64/addsub_n.asm
new file mode 100644
index 0000000000..4ed40d71ae
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addsub_n.asm
@@ -0,0 +1,107 @@
+# PowerPC-64 mpn_addsub_n -- Simultaneous add and sub.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`asm-syntax.m4')
+
+define(SAVE_BORROW_RESTORE_CARRY,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+define(SAVE_CARRY_RESTORE_BORROW,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+
+# 19991117
+
+# This is just crafted for testing some ideas, and verifying that we can make
+# it run fast. It runs at 2.55 cycles/limb on the 630, which is very good.
+# We should play a little with the schedule. No time has been spent on that.
+
+# To finish this, the loop warm up and cool down code needs to be written,
+# and the result need to be tested. Also, the proper calling sequence should
+# be used.
+
+# r1p r2p s1p s2p n
+# Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
+
+ASM_START()
+PROLOGUE(mpn_addsub_n)
+ std r14,-64(1)
+ std r15,-56(1)
+ std r16,-48(1)
+ std r17,-40(1)
+ std r18,-32(1)
+ std r19,-24(1)
+
+ srdi r7,r7,2
+ mtctr r7 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ addi r4,r4,-8 # offset res_ptr, it's updated before it's used
+
+.Loop:
+ adde r12,r8,r9
+ std r12,8(r3)
+ adde r12,r10,r11
+ std r12,16(r3)
+
+ SAVE_CARRY_RESTORE_BORROW(r0)
+
+ subfe r12,r8,r9
+ std r12,8(r4)
+ ld r8,8(r5) # s1 L 1
+ ld r9,8(r6) # s2 L 1
+ subfe r12,r10,r11
+ std r12,16(r4)
+ ld r10,16(r5) # s1 L 2
+ ld r11,16(r6) # s2 L 2
+# pair -------------------------
+ subfe r12,r14,r15
+ std r12,24(r4)
+ subfe r12,r16,r17
+ stdu r12,32(r4)
+
+ SAVE_BORROW_RESTORE_CARRY(r0)
+
+ adde r12,r14,r15
+ std r12,24(r3)
+ ld r14,24(r5) # s1 L 3
+ ld r15,24(r6) # s2 L 3
+ adde r12,r16,r17
+ stdu r12,32(r3)
+ ldu r16,32(r5) # s1 L 4
+ ldu r17,32(r6) # s2 L 4
+ bdnz .Loop
+
+ ld r14,-64(1)
+ ld r15,-56(1)
+ ld r16,-48(1)
+ ld r17,-40(1)
+ ld r18,-32(1)
+ ld r19,-24(1)
+ blr
+EPILOGUE(mpn_addsub_n)
diff --git a/rts/gmp/mpn/powerpc64/aix.m4 b/rts/gmp/mpn/powerpc64/aix.m4
new file mode 100644
index 0000000000..aee9f1f97a
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/aix.m4
@@ -0,0 +1,40 @@
+divert(-1)
+dnl m4 macros for AIX 64-bit assembly.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+define(`ASM_START',
+ `.machine "ppc64"
+ .toc')
+
+define(`PROLOGUE',
+ `
+ .globl $1
+ .globl .$1
+ .csect $1[DS],3
+$1:
+ .llong .$1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.$1:')
+
+define(`EPILOGUE', `')
+
+divert
diff --git a/rts/gmp/mpn/powerpc64/copyd.asm b/rts/gmp/mpn/powerpc64/copyd.asm
new file mode 100644
index 0000000000..d06e8c25fd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/copyd.asm
@@ -0,0 +1,45 @@
+# PowerPC-64 mpn_copyd -- Copy a limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr r3
+# sptr r4
+# n r5
+
+include(`../config.m4')
+
+# Unrolling this analogous to sparc64/copyi.s doesn't help for any
+# operand sizes.
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ cmpdi cr0,r5,0
+ mtctr r5
+ sldi r5,r5,3
+ add r4,r4,r5
+ add r3,r3,r5
+ beq cr0,.Lend
+.Loop: ldu r0,-8(r4)
+ stdu r0,-8(r3)
+ bdnz .Loop
+.Lend: blr
+EPILOGUE(mpn_copyd)
diff --git a/rts/gmp/mpn/powerpc64/copyi.asm b/rts/gmp/mpn/powerpc64/copyi.asm
new file mode 100644
index 0000000000..a1bedc4c5b
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/copyi.asm
@@ -0,0 +1,44 @@
+# PowerPC-64 mpn_copyi -- Copy a limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr r3
+# sptr r4
+# n r5
+
+include(`../config.m4')
+
+# Unrolling this analogous to sparc64/copyi.s doesn't help for any
+# operand sizes.
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ cmpdi cr0,r5,0
+ mtctr r5
+ addi r4,r4,-8
+ addi r3,r3,-8
+ beq cr0,.Lend
+.Loop: ldu r0,8(r4)
+ stdu r0,8(r3)
+ bdnz .Loop
+.Lend: blr
+EPILOGUE(mpn_copyi)
diff --git a/rts/gmp/mpn/powerpc64/gmp-mparam.h b/rts/gmp/mpn/powerpc64/gmp-mparam.h
new file mode 100644
index 0000000000..6fefb960cd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Generated by tuneup.c, 2000-07-16. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 10
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 57
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 16
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 89
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 28
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 216
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 14
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 6
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 163
+#endif
diff --git a/rts/gmp/mpn/powerpc64/lshift.asm b/rts/gmp/mpn/powerpc64/lshift.asm
new file mode 100644
index 0000000000..cef3a81fdd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/lshift.asm
@@ -0,0 +1,159 @@
+# PowerPC-64 mpn_lshift -- Shift a number left.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cmpdi cr0,r5,20 # more than 20 limbs?
+ sldi r0,r5,3
+ add r4,r4,r0 # make r4 point at end of s1
+ add r7,r3,r0 # make r7 point at end of res
+ bgt .LBIG # branch if more than 12 limbs
+
+ mtctr r5 # copy size into CTR
+ subfic r8,r6,64
+ ldu r11,-8(r4) # load first s1 limb
+ srd r3,r11,r8 # compute function return value
+ bdz .Lend1
+
+.Loop: ldu r10,-8(r4)
+ sld r9,r11,r6
+ srd r12,r10,r8
+ or r9,r9,r12
+ stdu r9,-8(r7)
+ bdz .Lend2
+ ldu r11,-8(r4)
+ sld r9,r10,r6
+ srd r12,r11,r8
+ or r9,r9,r12
+ stdu r9,-8(r7)
+ bdnz .Loop
+
+.Lend1: sld r0,r11,r6
+ std r0,-8(r7)
+ blr
+.Lend2: sld r0,r10,r6
+ std r0,-8(r7)
+ blr
+
+.LBIG:
+ std r24,-64(1)
+ std r25,-56(1)
+ std r26,-48(1)
+ std r27,-40(1)
+ std r28,-32(1)
+ std r29,-24(1)
+ std r30,-16(1)
+ std r31,-8(1)
+ ldu r9,-8(r4)
+ subfic r8,r6,64
+ srd r3,r9,r8 # compute function return value
+ sld r0,r9,r6
+ addi r5,r5,-1
+
+ andi. r10,r5,3 # count for spill loop
+ beq .Le
+ mtctr r10
+ ldu r28,-8(r4)
+ bdz .Lxe0
+
+.Loop0: sld r12,r28,r6
+ srd r24,r28,r8
+ ldu r28,-8(r4)
+ or r24,r0,r24
+ stdu r24,-8(r7)
+ mr r0,r12
+ bdnz .Loop0 # taken at most once!
+
+.Lxe0: sld r12,r28,r6
+ srd r24,r28,r8
+ or r24,r0,r24
+ stdu r24,-8(r7)
+ mr r0,r12
+
+.Le: srdi r5,r5,2 # count for unrolled loop
+ addi r5,r5,-1
+ mtctr r5
+ ld r28,-8(r4)
+ ld r29,-16(r4)
+ ld r30,-24(r4)
+ ldu r31,-32(r4)
+
+.LoopU: sld r9,r28,r6
+ srd r24,r28,r8
+ ld r28,-8(r4)
+ sld r10,r29,r6
+ srd r25,r29,r8
+ ld r29,-16(r4)
+ sld r11,r30,r6
+ srd r26,r30,r8
+ ld r30,-24(r4)
+ sld r12,r31,r6
+ srd r27,r31,r8
+ ldu r31,-32(r4)
+ or r24,r0,r24
+ std r24,-8(r7)
+ or r25,r9,r25
+ std r25,-16(r7)
+ or r26,r10,r26
+ std r26,-24(r7)
+ or r27,r11,r27
+ stdu r27,-32(r7)
+ mr r0,r12
+ bdnz .LoopU
+
+ sld r9,r28,r6
+ srd r24,r28,r8
+ sld r10,r29,r6
+ srd r25,r29,r8
+ sld r11,r30,r6
+ srd r26,r30,r8
+ sld r12,r31,r6
+ srd r27,r31,r8
+ or r24,r0,r24
+ std r24,-8(r7)
+ or r25,r9,r25
+ std r25,-16(r7)
+ or r26,r10,r26
+ std r26,-24(r7)
+ or r27,r11,r27
+ stdu r27,-32(r7)
+ mr r0,r12
+
+ std r0,-8(r7)
+ ld r24,-64(1)
+ ld r25,-56(1)
+ ld r26,-48(1)
+ ld r27,-40(1)
+ ld r28,-32(1)
+ ld r29,-24(1)
+ ld r30,-16(1)
+ ld r31,-8(1)
+ blr
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/powerpc64/mul_1.asm b/rts/gmp/mpn/powerpc64/mul_1.asm
new file mode 100644
index 0000000000..47597283ff
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/mul_1.asm
@@ -0,0 +1,49 @@
+# PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ stdu 7,8(3)
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/powerpc64/rshift.asm b/rts/gmp/mpn/powerpc64/rshift.asm
new file mode 100644
index 0000000000..88272c7fa9
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/rshift.asm
@@ -0,0 +1,60 @@
+# PowerPC-64 mpn_rshift -- Shift a number right.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mtctr r5 # copy size into CTR
+ addi r7,r3,-8 # move adjusted res_ptr to free return reg
+ subfic r8,r6,64
+ ld r11,0(r4) # load first s1 limb
+ sld r3,r11,r8 # compute function return value
+ bdz .Lend1
+
+.Loop: ldu r10,8(r4)
+ srd r9,r11,r6
+ sld r12,r10,r8
+ or r9,r9,r12
+ stdu r9,8(r7)
+ bdz .Lend2
+ ldu r11,8(r4)
+ srd r9,r10,r6
+ sld r12,r11,r8
+ or r9,r9,r12
+ stdu r9,8(r7)
+ bdnz .Loop
+
+.Lend1: srd r0,r11,r6
+ std r0,8(r7)
+ blr
+
+.Lend2: srd r0,r10,r6
+ std r0,8(r7)
+ blr
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/powerpc64/sub_n.asm b/rts/gmp/mpn/powerpc64/sub_n.asm
new file mode 100644
index 0000000000..4de3de69c7
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/sub_n.asm
@@ -0,0 +1,61 @@
+# PowerPC-64 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+# and store difference in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.b
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ mtctr r6 # copy size into CTR
+ addic r0,r6,-1 # set cy
+ ld r8,0(r4) # load least significant s1 limb
+ ld r0,0(r5) # load least significant s2 limb
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ bdz .Lend # If done, skip loop
+.Loop: ld r9,8(r4) # load s1 limb
+ ld r10,8(r5) # load s2 limb
+ subfe r7,r0,r8 # subtract limbs with cy, set cy
+ std r7,8(r3) # store result limb
+ bdz .Lexit # decrement CTR and exit if done
+ ldu r8,16(r4) # load s1 limb and update s1_ptr
+ ldu r0,16(r5) # load s2 limb and update s2_ptr
+ subfe r7,r10,r9 # subtract limbs with cy, set cy
+ stdu r7,16(r3) # store result limb and update res_ptr
+ bdnz .Loop # decrement CTR and loop back
+
+.Lend: subfe r7,r0,r8
+ std r7,8(r3) # store ultimate result limb
+ subfe r3,r0,r0 # load !cy into ...
+ subfic r3,r3,0 # ... return value register
+ blr
+.Lexit: subfe r7,r10,r9
+ std r7,16(r3)
+ subfe r3,r0,r0 # load !cy into ...
+ subfic r3,r3,0 # ... return value register
+ blr
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/powerpc64/submul_1.asm b/rts/gmp/mpn/powerpc64/submul_1.asm
new file mode 100644
index 0000000000..17f6369a38
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/submul_1.asm
@@ -0,0 +1,54 @@
+# PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ ld 10,8(3)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ addze 9,9
+ subfc 7,7,10
+ stdu 7,8(3)
+ subfe 11,11,11 # invert ...
+ addic 11,11,1 # ... carry
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/pyr/add_n.s b/rts/gmp/mpn/pyr/add_n.s
new file mode 100644
index 0000000000..e1fc535846
--- /dev/null
+++ b/rts/gmp/mpn/pyr/add_n.s
@@ -0,0 +1,76 @@
+# Pyramid __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ movw $-1,tr0 # representation for carry clear
+
+ movw pr3,tr2
+ andw $3,tr2
+ beq Lend0
+ subw tr2,pr3
+
+Loop0: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ addwc (pr2),tr1
+ movw tr1,(pr0)
+
+ subwb tr0,tr0
+ addw $4,pr0
+ addw $4,pr1
+ addw $4,pr2
+ addw $-1,tr2
+ bne Loop0
+
+ mtstw pr3,pr3
+ beq Lend
+Lend0:
+Loop: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ addwc (pr2),tr1
+ movw tr1,(pr0)
+
+ movw 4(pr1),tr1
+ addwc 4(pr2),tr1
+ movw tr1,4(pr0)
+
+ movw 8(pr1),tr1
+ addwc 8(pr2),tr1
+ movw tr1,8(pr0)
+
+ movw 12(pr1),tr1
+ addwc 12(pr2),tr1
+ movw tr1,12(pr0)
+
+ subwb tr0,tr0
+ addw $16,pr0
+ addw $16,pr1
+ addw $16,pr2
+ addw $-4,pr3
+ bne Loop
+Lend:
+ mnegw tr0,pr0
+ ret
diff --git a/rts/gmp/mpn/pyr/addmul_1.s b/rts/gmp/mpn/pyr/addmul_1.s
new file mode 100644
index 0000000000..65c3f8f008
--- /dev/null
+++ b/rts/gmp/mpn/pyr/addmul_1.s
@@ -0,0 +1,45 @@
+# Pyramid __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+ mova (pr0)[pr2*4],pr0
+ mova (pr1)[pr2*4],pr1
+ mnegw pr2,pr2
+ movw $0,tr3
+
+Loop: movw (pr1)[pr2*4],tr1
+ uemul pr3,tr0
+ addw tr3,tr1
+ movw $0,tr3
+ addwc tr0,tr3
+ movw (pr0)[pr2*0x4],tr0
+ addw tr0,tr1
+ addwc $0,tr3
+ movw tr1,(pr0)[pr2*4]
+ addw $1,pr2
+ bne Loop
+
+ movw tr3,pr0
+ ret
diff --git a/rts/gmp/mpn/pyr/mul_1.s b/rts/gmp/mpn/pyr/mul_1.s
new file mode 100644
index 0000000000..1272297c42
--- /dev/null
+++ b/rts/gmp/mpn/pyr/mul_1.s
@@ -0,0 +1,42 @@
+# Pyramid __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ mova (pr0)[pr2*4],pr0
+ mova (pr1)[pr2*4],pr1
+ mnegw pr2,pr2
+ movw $0,tr3
+
+Loop: movw (pr1)[pr2*4],tr1
+ uemul pr3,tr0
+ addw tr3,tr1
+ movw $0,tr3
+ addwc tr0,tr3
+ movw tr1,(pr0)[pr2*4]
+ addw $1,pr2
+ bne Loop
+
+ movw tr3,pr0
+ ret
diff --git a/rts/gmp/mpn/pyr/sub_n.s b/rts/gmp/mpn/pyr/sub_n.s
new file mode 100644
index 0000000000..1fd2eb0f17
--- /dev/null
+++ b/rts/gmp/mpn/pyr/sub_n.s
@@ -0,0 +1,76 @@
+# Pyramid __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ movw $-1,tr0 # representation for carry clear
+
+ movw pr3,tr2
+ andw $3,tr2
+ beq Lend0
+ subw tr2,pr3
+
+Loop0: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ subwb (pr2),tr1
+ movw tr1,(pr0)
+
+ subwb tr0,tr0
+ addw $4,pr0
+ addw $4,pr1
+ addw $4,pr2
+ addw $-1,tr2
+ bne Loop0
+
+ mtstw pr3,pr3
+ beq Lend
+Lend0:
+Loop: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ subwb (pr2),tr1
+ movw tr1,(pr0)
+
+ movw 4(pr1),tr1
+ subwb 4(pr2),tr1
+ movw tr1,4(pr0)
+
+ movw 8(pr1),tr1
+ subwb 8(pr2),tr1
+ movw tr1,8(pr0)
+
+ movw 12(pr1),tr1
+ subwb 12(pr2),tr1
+ movw tr1,12(pr0)
+
+ subwb tr0,tr0
+ addw $16,pr0
+ addw $16,pr1
+ addw $16,pr2
+ addw $-4,pr3
+ bne Loop
+Lend:
+ mnegw tr0,pr0
+ ret
diff --git a/rts/gmp/mpn/sh/add_n.s b/rts/gmp/mpn/sh/add_n.s
new file mode 100644
index 0000000000..df388b31a3
--- /dev/null
+++ b/rts/gmp/mpn/sh/add_n.s
@@ -0,0 +1,47 @@
+! SH __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! s2_ptr r6
+! size r7
+
+ .text
+ .align 2
+ .global ___gmpn_add_n
+___gmpn_add_n:
+ mov #0,r3 ! clear cy save reg
+
+Loop: mov.l @r5+,r1
+ mov.l @r6+,r2
+ shlr r3 ! restore cy
+ addc r2,r1
+ movt r3 ! save cy
+ mov.l r1,@r4
+ dt r7
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r3,r0 ! return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/sh/sh2/addmul_1.s b/rts/gmp/mpn/sh/sh2/addmul_1.s
new file mode 100644
index 0000000000..f34a7f0503
--- /dev/null
+++ b/rts/gmp/mpn/sh/sh2/addmul_1.s
@@ -0,0 +1,53 @@
+! SH2 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! size r6
+! s2_limb r7
+
+ .text
+ .align 1
+ .global ___gmpn_addmul_1
+___gmpn_addmul_1:
+ mov #0,r2 ! cy_limb = 0
+ mov #0,r0 ! Keep r0 = 0 for entire loop
+ clrt
+
+Loop: mov.l @r5+,r3
+ dmulu.l r3,r7
+ sts macl,r1
+ addc r2,r1 ! lo_prod += old cy_limb
+ sts mach,r2 ! new cy_limb = hi_prod
+ mov.l @r4,r3
+ addc r0,r2 ! cy_limb += T, T = 0
+ addc r3,r1
+ addc r0,r2 ! cy_limb += T, T = 0
+ dt r6
+ mov.l r1,@r4
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r2,r0
diff --git a/rts/gmp/mpn/sh/sh2/mul_1.s b/rts/gmp/mpn/sh/sh2/mul_1.s
new file mode 100644
index 0000000000..2a117a3175
--- /dev/null
+++ b/rts/gmp/mpn/sh/sh2/mul_1.s
@@ -0,0 +1,50 @@
+! SH2 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! size r6
+! s2_limb r7
+
+ .text
+ .align 1
+ .global ___gmpn_mul_1
+___gmpn_mul_1:
+ mov #0,r2 ! cy_limb = 0
+ mov #0,r0 ! Keep r0 = 0 for entire loop
+ clrt
+
+Loop: mov.l @r5+,r3
+ dmulu.l r3,r7
+ sts macl,r1
+ addc r2,r1
+ sts mach,r2
+ addc r0,r2 ! propagate carry to cy_limb (dt clobbers T)
+ dt r6
+ mov.l r1,@r4
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r2,r0
diff --git a/rts/gmp/mpn/sh/sh2/submul_1.s b/rts/gmp/mpn/sh/sh2/submul_1.s
new file mode 100644
index 0000000000..eb9a27dde3
--- /dev/null
+++ b/rts/gmp/mpn/sh/sh2/submul_1.s
@@ -0,0 +1,53 @@
+! SH2 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! size r6
+! s2_limb r7
+
+ .text
+ .align 1
+ .global ___gmpn_submul_1
+___gmpn_submul_1:
+ mov #0,r2 ! cy_limb = 0
+ mov #0,r0 ! Keep r0 = 0 for entire loop
+ clrt
+
+Loop: mov.l @r5+,r3
+ dmulu.l r3,r7
+ sts macl,r1
+ addc r2,r1 ! lo_prod += old cy_limb
+ sts mach,r2 ! new cy_limb = hi_prod
+ mov.l @r4,r3
+ addc r0,r2 ! cy_limb += T, T = 0
+ subc r3,r1
+ addc r0,r2 ! cy_limb += T, T = 0
+ dt r6
+ mov.l r1,@r4
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r2,r0
diff --git a/rts/gmp/mpn/sh/sub_n.s b/rts/gmp/mpn/sh/sub_n.s
new file mode 100644
index 0000000000..5f818c95a8
--- /dev/null
+++ b/rts/gmp/mpn/sh/sub_n.s
@@ -0,0 +1,47 @@
+! SH __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+! difference in a third limb vector.
+
+! Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! s2_ptr r6
+! size r7
+
+ .text
+ .align 2
+ .global ___gmpn_sub_n
+___gmpn_sub_n:
+ mov #0,r3 ! clear cy save reg
+
+Loop: mov.l @r5+,r1
+ mov.l @r6+,r2
+ shlr r3 ! restore cy
+ subc r2,r1
+ movt r3 ! save cy
+ mov.l r1,@r4
+ dt r7
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r3,r0 ! return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/sparc32/README b/rts/gmp/mpn/sparc32/README
new file mode 100644
index 0000000000..7c19df7bc4
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/README
@@ -0,0 +1,36 @@
+This directory contains mpn functions for various SPARC chips. Code that
+runs only on version 8 SPARC implementations, is in the v8 subdirectory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+ Load and Store timing
+
+On most early SPARC implementations, the ST instructions takes multiple
+cycles, while a STD takes just a single cycle more than an ST. For the CPUs
+in SPARCstation I and II, the times are 3 and 4 cycles, respectively.
+Therefore, combining two ST instrucitons into a STD when possible is a
+significant optimiation.
+
+Later SPARC implementations have single cycle ST.
+
+For SuperSPARC, we can perform just one memory instruction per cycle, even
+if up to two integer instructions can be executed in its pipeline. For
+programs that perform so many memory operations that there are not enough
+non-memory operations to issue in parallel with all memory operations, using
+LDD and STD when possible helps.
+
+STATUS
+
+1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5
+ cycles/limb asymptotically. We could optimize speed for special counts
+ by using ADDXCC.
+
+2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2
+ cycles/limb asymptotically.
+
+3. mpn_mul_1 runs at what is believed to be optimal speed.
+
+4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a
+ cycle by avoiding one of the add instrucitons. See a29k/addmul_1.
+
+The speed of the code for other SPARC implementations is uncertain.
diff --git a/rts/gmp/mpn/sparc32/add_n.asm b/rts/gmp/mpn/sparc32/add_n.asm
new file mode 100644
index 0000000000..5f1d00c0e0
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/add_n.asm
@@ -0,0 +1,236 @@
+dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl sum in a third limb vector.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) C branch if alignment differs
+ nop
+C ** V1a **
+L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc n,-10,n
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1):
+ addcc n,8-2,n
+ blt L(end1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1):
+ addxcc %g4,%g2,%o4
+ addxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+C ** V1b **
+ mov s2_ptr,%g1
+ mov s1_ptr,s2_ptr
+ b L(0)
+ mov %g1,s1_ptr
+
+C ** V2 **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ. Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2): cmp n,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
+ be L(v2) C if no, branch
+ nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ blt L(fin2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin2):
+ addcc n,8-2,n
+ blt L(end2)
+ subcc %g0,%o4,%g0 C restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 C restore cy
+L(end2):
+ andcc n,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/sparc32/addmul_1.asm b/rts/gmp/mpn/sparc32/addmul_1.asm
new file mode 100644
index 0000000000..80c94e4251
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/addmul_1.asm
@@ -0,0 +1,146 @@
+dnl SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ addcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ addcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/sparc32/lshift.asm b/rts/gmp/mpn/sparc32/lshift.asm
new file mode 100644
index 0000000000..529733ac2d
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/lshift.asm
@@ -0,0 +1,97 @@
+dnl SPARC mpn_lshift -- Shift a number left.
+dnl
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr %o0
+C src_ptr %o1
+C size %o2
+C cnt %o3
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ sll %o2,2,%g1
+ add %o1,%g1,%o1 C make %o1 point at end of src
+ ld [%o1-4],%g2 C load first limb
+ sub %g0,%o3,%o5 C negate shift count
+ add %o0,%g1,%o0 C make %o0 point at end of res
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 C number of limbs in first loop
+ srl %g2,%o5,%g1 C compute function result
+ be L(0) C if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 C adjust count for main loop
+
+L(loop0):
+ ld [%o1-8],%g3
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ addcc %g4,-1,%g4
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0+0]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1-8],%g3
+ add %o0,-16,%o0
+ addcc %o2,-4,%o2
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+
+ ld [%o1-12],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+12]
+ srl %g2,%o5,%g1
+
+ ld [%o1-16],%g3
+ sll %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0+8]
+ srl %g3,%o5,%g1
+
+ ld [%o1-20],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+4]
+ srl %g2,%o5,%g1
+
+ add %o1,-16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0+0]
+
+L(end): sll %g2,%o3,%g2
+ st %g2,[%o0-4]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/sparc32/mul_1.asm b/rts/gmp/mpn/sparc32/mul_1.asm
new file mode 100644
index 0000000000..e5fedeabaa
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/mul_1.asm
@@ -0,0 +1,137 @@
+dnl SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne,a L(loop0)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g1,[%o4+%o2]
+
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2 C g2 = S1_LIMB iff S2_LIMB < 0, else 0
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne,a L(loop)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/sparc32/rshift.asm b/rts/gmp/mpn/sparc32/rshift.asm
new file mode 100644
index 0000000000..9187dbaa6f
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/rshift.asm
@@ -0,0 +1,93 @@
+dnl SPARC mpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr %o0
+C src_ptr %o1
+C size %o2
+C cnt %o3
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ld [%o1],%g2 C load first limb
+ sub %g0,%o3,%o5 C negate shift count
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 C number of limbs in first loop
+ sll %g2,%o5,%g1 C compute function result
+ be L(0) C if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 C adjust count for main loop
+
+L(loop0):
+ ld [%o1+4],%g3
+ add %o0,4,%o0
+ add %o1,4,%o1
+ addcc %g4,-1,%g4
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0-4]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1+4],%g3
+ add %o0,16,%o0
+ addcc %o2,-4,%o2
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+
+ ld [%o1+8],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-16]
+ sll %g2,%o5,%g1
+
+ ld [%o1+12],%g3
+ srl %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0-12]
+ sll %g3,%o5,%g1
+
+ ld [%o1+16],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-8]
+ sll %g2,%o5,%g1
+
+ add %o1,16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0-4]
+
+L(end): srl %g2,%o3,%g2
+ st %g2,[%o0-0]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/sparc32/sub_n.asm b/rts/gmp/mpn/sparc32/sub_n.asm
new file mode 100644
index 0000000000..071909a1b6
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/sub_n.asm
@@ -0,0 +1,326 @@
+dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) C branch if alignment differs
+ nop
+C ** V1a **
+ andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc n,-10,n
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1):
+ addcc n,8-2,n
+ blt L(end1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1):
+ subxcc %g4,%g2,%o4
+ subxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+C ** V1b **
+ andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1b) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s1_ptr
+ ld [s2_ptr],%g4
+ add s2_ptr,4,s2_ptr
+ ld [s1_ptr],%g2
+ add s1_ptr,4,s1_ptr
+ add n,-1,n
+ subcc %g2,%g4,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1b): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s2_ptr+0],%g4
+ addcc n,-10,n
+ ld [s2_ptr+4],%g1
+ ldd [s1_ptr+0],%g2
+ blt L(fin1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+16],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+20],%g1
+ ldd [s1_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+24],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+28],%g1
+ ldd [s1_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+32],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+36],%g1
+ ldd [s1_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1b)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1b):
+ addcc n,8-2,n
+ blt L(end1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1b)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1b):
+ subxcc %g2,%g4,%o4
+ subxcc %g3,%g1,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s2_ptr+8],%g4
+ ld [s1_ptr+8],%g2
+ subxcc %g2,%g4,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1b):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+C ** V2 **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ. Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2): cmp n,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
+ be L(v2) C if no, branch
+ nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ blt L(fin2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin2):
+ addcc n,8-2,n
+ blt L(end2)
+ subcc %g0,%o4,%g0 C restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 C restore cy
+L(end2):
+ andcc n,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/sparc32/submul_1.asm b/rts/gmp/mpn/sparc32/submul_1.asm
new file mode 100644
index 0000000000..12abd844ce
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/submul_1.asm
@@ -0,0 +1,146 @@
+dnl SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ subcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ subcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/sparc32/udiv_fp.asm b/rts/gmp/mpn/sparc32/udiv_fp.asm
new file mode 100644
index 0000000000..e340e147d2
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/udiv_fp.asm
@@ -0,0 +1,158 @@
+dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl This is for v7 CPUs with a floating-point unit.
+
+dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+
+ifdef(`PIC',
+` TEXT
+L(getpc):
+ retl
+ nop')
+
+ TEXT
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc): call L(getpc) C put address of this insn in %o7
+ ldd [%o7+L(C0)-L(pc)],%f8',
+` sethi %hi(L(C0)),%o7
+ ldd [%o7+%lo(L(C0))],%f8')
+
+ fitod %f10,%f4
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+
+ifdef(`PIC',
+` ldd [%o7+L(C1)-L(pc)],%f4',
+` sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4')
+
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ wr %g0,%i4,%y
+ sra %i3,31,%g2
+ and %i4,%g2,%g2
+ andcc %g0,0,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,0,%g1
+ add %g1,%g2,%i0
+ rd %y,%g3
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/rts/gmp/mpn/sparc32/udiv_nfp.asm b/rts/gmp/mpn/sparc32/udiv_nfp.asm
new file mode 100644
index 0000000000..ae19f4c6e9
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/udiv_nfp.asm
@@ -0,0 +1,193 @@
+dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl This is for v7 CPUs without a floating-point unit.
+
+dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr o0
+C n1 o1
+C n0 o2
+C d o3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+ tst %o3
+ bneg L(largedivisor)
+ mov 8,%g1
+
+ b L(p1)
+ addxcc %o2,%o2,%o2
+
+L(plop):
+ bcc L(n1)
+ addxcc %o2,%o2,%o2
+L(p1): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n2)
+ addxcc %o2,%o2,%o2
+L(p2): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n3)
+ addxcc %o2,%o2,%o2
+L(p3): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n4)
+ addxcc %o2,%o2,%o2
+L(p4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(plop)
+ subcc %o1,%o3,%o4
+ bcc L(n5)
+ addxcc %o2,%o2,%o2
+L(p5): st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(nlop):
+ bcc L(p1)
+ addxcc %o2,%o2,%o2
+L(n1): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p2)
+ addxcc %o2,%o2,%o2
+L(n2): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p3)
+ addxcc %o2,%o2,%o2
+L(n3): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p4)
+ addxcc %o2,%o2,%o2
+L(n4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(nlop)
+ subcc %o4,%o3,%o1
+ bcc L(p5)
+ addxcc %o2,%o2,%o2
+L(n5): st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(largedivisor):
+ and %o2,1,%o5 C %o5 = n0 & 1
+
+ srl %o2,1,%o2
+ sll %o1,31,%g2
+ or %g2,%o2,%o2 C %o2 = lo(n1n0 >> 1)
+ srl %o1,1,%o1 C %o1 = hi(n1n0 >> 1)
+
+ and %o3,1,%g2
+ srl %o3,1,%g3 C %g3 = floor(d / 2)
+ add %g3,%g2,%g3 C %g3 = ceil(d / 2)
+
+ b L(Lp1)
+ addxcc %o2,%o2,%o2
+
+L(Lplop):
+ bcc L(Ln1)
+ addxcc %o2,%o2,%o2
+L(Lp1): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln2)
+ addxcc %o2,%o2,%o2
+L(Lp2): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln3)
+ addxcc %o2,%o2,%o2
+L(Lp3): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln4)
+ addxcc %o2,%o2,%o2
+L(Lp4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(Lplop)
+ subcc %o1,%g3,%o4
+ bcc L(Ln5)
+ addxcc %o2,%o2,%o2
+L(Lp5): add %o1,%o1,%o1 C << 1
+ tst %g2
+ bne L(oddp)
+ add %o5,%o1,%o1
+ st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(Lnlop):
+ bcc L(Lp1)
+ addxcc %o2,%o2,%o2
+L(Ln1): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp2)
+ addxcc %o2,%o2,%o2
+L(Ln2): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp3)
+ addxcc %o2,%o2,%o2
+L(Ln3): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp4)
+ addxcc %o2,%o2,%o2
+L(Ln4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(Lnlop)
+ subcc %o4,%g3,%o1
+ bcc L(Lp5)
+ addxcc %o2,%o2,%o2
+L(Ln5): add %o4,%o4,%o4 C << 1
+ tst %g2
+ bne L(oddn)
+ add %o5,%o4,%o4
+ st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(oddp):
+ xnor %g0,%o2,%o2
+ C q' in %o2. r' in %o1
+ addcc %o1,%o2,%o1
+ bcc L(Lp6)
+ addx %o2,0,%o2
+ sub %o1,%o3,%o1
+L(Lp6): subcc %o1,%o3,%g0
+ bcs L(Lp7)
+ subx %o2,-1,%o2
+ sub %o1,%o3,%o1
+L(Lp7): st %o1,[%o0]
+ retl
+ mov %o2,%o0
+
+L(oddn):
+ xnor %g0,%o2,%o2
+ C q' in %o2. r' in %o4
+ addcc %o4,%o2,%o4
+ bcc L(Ln6)
+ addx %o2,0,%o2
+ sub %o4,%o3,%o4
+L(Ln6): subcc %o4,%o3,%g0
+ bcs L(Ln7)
+ subx %o2,-1,%o2
+ sub %o4,%o3,%o4
+L(Ln7): st %o4,[%o0]
+ retl
+ mov %o2,%o0
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/rts/gmp/mpn/sparc32/umul.asm b/rts/gmp/mpn/sparc32/umul.asm
new file mode 100644
index 0000000000..efa56851d6
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/umul.asm
@@ -0,0 +1,68 @@
+dnl SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ wr %g0,%o1,%y
+ sra %o2,31,%g2 C Don't move this insn
+ and %o1,%g2,%g2 C Don't move this insn
+ andcc %g0,0,%g1 C Don't move this insn
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,0,%g1
+ rd %y,%g3
+ st %g3,[%o0]
+ retl
+ add %g1,%g2,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/rts/gmp/mpn/sparc32/v8/addmul_1.asm b/rts/gmp/mpn/sparc32/v8/addmul_1.asm
new file mode 100644
index 0000000000..da44644b51
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/addmul_1.asm
@@ -0,0 +1,122 @@
+dnl SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ orcc %g0,%g0,%g2
+ ld [%o1+0],%o4 C 1
+
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+ifdef(`PIC',
+` mov %o7,%g4 C Save return address register
+0: call 1f
+ add %o7,L(1)-0b,%g3
+1: mov %g4,%o7 C Restore return address register
+',
+` sethi %hi(L(1)),%g3
+ or %g3,%lo(L(1)),%g3
+')
+ jmp %g3+%g1
+ nop
+L(1):
+L(L00): add %o0,-4,%o0
+ b L(loop00) C 4, 8, 12, ...
+ add %o1,-4,%o1
+ nop
+L(L01): b L(loop01) C 1, 5, 9, ...
+ nop
+ nop
+ nop
+L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
+ b L(loop10)
+ add %o1,4,%o1
+ nop
+L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
+ b L(loop11)
+ add %o1,-8,%o1
+ nop
+
+L(loop):
+ addcc %g3,%g2,%g3 C 1
+ ld [%o1+4],%o4 C 2
+ rd %y,%g2 C 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] C 1
+L(loop00):
+ umul %o4,%o3,%g3 C 2
+ ld [%o0+4],%g1 C 2
+ addxcc %g3,%g2,%g3 C 2
+ ld [%o1+8],%o4 C 3
+ rd %y,%g2 C 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] C 2
+L(loop11):
+ umul %o4,%o3,%g3 C 3
+ addxcc %g3,%g2,%g3 C 3
+ ld [%o1+12],%o4 C 4
+ rd %y,%g2 C 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] C 3
+L(loop10):
+ umul %o4,%o3,%g3 C 4
+ addxcc %g3,%g2,%g3 C 4
+ ld [%o1+0],%o4 C 1
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] C 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+L(loop01):
+ addcc %o2,-4,%o2
+ bg L(loop)
+ umul %o4,%o3,%g3 C 1
+
+ addcc %g3,%g2,%g3 C 4
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] C 4
+ addx %g0,%g2,%o0
+
+ retl
+ nop
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/sparc32/v8/mul_1.asm b/rts/gmp/mpn/sparc32/v8/mul_1.asm
new file mode 100644
index 0000000000..801247553a
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/mul_1.asm
@@ -0,0 +1,103 @@
+dnl SPARC v8 mpn_mul_1 -- Multiply a limb vector with a single limb and
+dnl store the product in a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+ifdef(`PIC',
+` mov %o7,%g4 C Save return address register
+0: call 1f
+ add %o7,L(1)-0b,%g3
+1: mov %g4,%o7 C Restore return address register
+',
+` sethi %hi(L(1)),%g3
+ or %g3,%lo(L(1)),%g3
+')
+ jmp %g3+%g1
+ ld [%o1+0],%o4 C 1
+L(1):
+L(L00): add %o0,-4,%o0
+ add %o1,-4,%o1
+ b L(loop00) C 4, 8, 12, ...
+ orcc %g0,%g0,%g2
+L(L01): b L(loop01) C 1, 5, 9, ...
+ orcc %g0,%g0,%g2
+ nop
+ nop
+L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
+ add %o1,4,%o1
+ b L(loop10)
+ orcc %g0,%g0,%g2
+ nop
+L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
+ add %o1,-8,%o1
+ b L(loop11)
+ orcc %g0,%g0,%g2
+
+L(loop):
+ addcc %g3,%g2,%g3 C 1
+ ld [%o1+4],%o4 C 2
+ st %g3,[%o0+0] C 1
+ rd %y,%g2 C 1
+L(loop00):
+ umul %o4,%o3,%g3 C 2
+ addxcc %g3,%g2,%g3 C 2
+ ld [%o1+8],%o4 C 3
+ st %g3,[%o0+4] C 2
+ rd %y,%g2 C 2
+L(loop11):
+ umul %o4,%o3,%g3 C 3
+ addxcc %g3,%g2,%g3 C 3
+ ld [%o1+12],%o4 C 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] C 3
+ rd %y,%g2 C 3
+L(loop10):
+ umul %o4,%o3,%g3 C 4
+ addxcc %g3,%g2,%g3 C 4
+ ld [%o1+0],%o4 C 1
+ st %g3,[%o0+12] C 4
+ add %o0,16,%o0
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+L(loop01):
+ addcc %o2,-4,%o2
+ bg L(loop)
+ umul %o4,%o3,%g3 C 1
+
+ addcc %g3,%g2,%g3 C 4
+ st %g3,[%o0+0] C 4
+ rd %y,%g2 C 4
+
+ retl
+ addx %g0,%g2,%o0
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/sparc32/v8/submul_1.asm b/rts/gmp/mpn/sparc32/v8/submul_1.asm
new file mode 100644
index 0000000000..9ed132f4c1
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/submul_1.asm
@@ -0,0 +1,58 @@
+dnl SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ sub %g0,%o2,%o2 C negate ...
+ sll %o2,2,%o2 C ... and scale size
+ sub %o1,%o2,%o1 C o1 is offset s1_ptr
+ sub %o0,%o2,%g1 C g1 is offset res_ptr
+
+ mov 0,%o0 C clear cy_limb
+
+L(loop):
+ ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne L(loop)
+ nop
+
+ retl
+ nop
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm
new file mode 100644
index 0000000000..0d5e8d415d
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm
@@ -0,0 +1,122 @@
+dnl SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h.
+dnl This is for SuperSPARC only, to compensate for its semi-functional
+dnl udiv instruction.
+
+dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+
+ifdef(`PIC',
+` TEXT
+L(getpc):
+ retl
+ nop')
+
+ TEXT
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc): call L(getpc) C put address of this insn in %o7
+ ldd [%o7+L(C0)-L(pc)],%f8',
+` sethi %hi(L(C0)),%o7
+ ldd [%o7+%lo(L(C0))],%f8')
+
+ fitod %f10,%f4
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+
+ifdef(`PIC',
+` ldd [%o7+L(C1)-L(pc)],%f4',
+` sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4')
+
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ umul %i3,%i4,%g3
+ rd %y,%i0
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/rts/gmp/mpn/sparc32/v8/umul.asm b/rts/gmp/mpn/sparc32/v8/umul.asm
new file mode 100644
index 0000000000..ae8f692a0a
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/umul.asm
@@ -0,0 +1,31 @@
+dnl SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ umul %o1,%o2,%g2
+ st %g2,[%o0]
+ retl
+ rd %y,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/rts/gmp/mpn/sparc32/v9/README b/rts/gmp/mpn/sparc32/v9/README
new file mode 100644
index 0000000000..9b39713271
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/README
@@ -0,0 +1,4 @@
+Code for SPARC processors implementing version 9 of the SPARC architecture.
+This code is for systems that doesn't preserve the full 64-bit contents of
+integer register at context switch. For other systems (such as Solaris 7 or
+later) use the code in ../../sparc64.
diff --git a/rts/gmp/mpn/sparc32/v9/addmul_1.asm b/rts/gmp/mpn/sparc32/v9/addmul_1.asm
new file mode 100644
index 0000000000..c1762cc41f
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/addmul_1.asm
@@ -0,0 +1,288 @@
+dnl SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+PROLOGUE(mpn_addmul_1)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10')
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+C BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 C res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+L(ret): add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ st %g4,[%i0-4]
+
+ ret
+ restore %g0,%g3,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/sparc32/v9/gmp-mparam.h b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h
new file mode 100644
index 0000000000..f946b900f0
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h
@@ -0,0 +1,69 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+/* These values are for UltraSPARC I, II, and IIi. It is bogus that
+ this file lives in v9, but that will do for now. */
+
+/* Variations in addmul_1 speed make the multiply and square thresholds
+ doubtful. TOOM3_SQR_THRESHOLD had to be estimated here. */
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 30
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 200
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 59
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 500
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 107
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 146
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 29
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 3
+#endif
diff --git a/rts/gmp/mpn/sparc32/v9/mul_1.asm b/rts/gmp/mpn/sparc32/v9/mul_1.asm
new file mode 100644
index 0000000000..f8f0fdd8c2
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/mul_1.asm
@@ -0,0 +1,267 @@
+dnl SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and
+dnl store the result in a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+PROLOGUE(mpn_mul_1)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10')
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+C BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 C res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+L(ret): add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ st %g4,[%i0-4]
+
+ ret
+ restore %g0,%g3,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/sparc32/v9/submul_1.asm b/rts/gmp/mpn/sparc32/v9/submul_1.asm
new file mode 100644
index 0000000000..6195ea88ea
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/submul_1.asm
@@ -0,0 +1,291 @@
+dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+PROLOGUE(mpn_submul_1)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10')
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+C BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ addx %g3,0,%g3
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 C res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ addx %g3,0,%g3
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+L(ret): add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ srlx %g4,32,%g3
+ st %l2,[%i0-4]
+
+ addx %g3,%g0,%g3
+ ret
+ restore %g0,%g3,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/sparc64/README b/rts/gmp/mpn/sparc64/README
new file mode 100644
index 0000000000..6923a133f3
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/README
@@ -0,0 +1,48 @@
+This directory contains mpn functions for 64-bit V9 SPARC
+
+RELEVANT OPTIMIZATION ISSUES
+
+The Ultra I/II pipeline executes up to two simple integer arithmetic operations
+per cycle. The 64-bit integer multiply instruction mulx takes from 5 cycles to
+35 cycles, depending on the position of the most significant bit of the 1st
+source operand. It cannot overlap with other instructions. For our use of
+mulx, it will take from 5 to 20 cycles.
+
+Integer conditional move instructions cannot dual-issue with other integer
+instructions. No conditional move can issue 1-5 cycles after a load. (Or
+something such bizzare.)
+
+Integer branches can issue with two integer arithmetic instructions. Likewise
+for integer loads. Four instructions may issue (arith, arith, ld/st, branch)
+but only if the branch is last.
+
+(The V9 architecture manual recommends that the 2nd operand of a multiply
+instruction be the smaller one. For UltraSPARC, they got things backwards and
+optimize for the wrong operand! Really helpful in the light of that multiply
+is incredibly slow on these CPUs!)
+
+STATUS
+
+There is new code in ~/prec/gmp-remote/sparc64. Not tested or completed, but
+the pipelines are worked out. Here are the timings:
+
+* lshift, rshift: The code is well-optimized and runs at 2.0 cycles/limb.
+
+* add_n, sub_n: add3.s currently runs at 6 cycles/limb. We use a bizarre
+ scheme of compares and branches (with some nops and fnops to align things)
+ and carefully stay away from the instructions intended for this application
+ (i.e., movcs and movcc).
+
+ Using movcc/movcs, even with deep unrolling, seems to get down to 7
+ cycles/limb.
+
+ The most promising approach is to split operands in 32-bit pieces using
+ srlx, then use two addccc, and finally compile the results with sllx+or.
+ The result could run at 5 cycles/limb, I think. It might be possible to
+ do without unrolling, or with minimal unrolling.
+
+* addmul_1/submul_1: Should optimize for when scalar operand < 2^32.
+* addmul_1/submul_1: Since mulx is horrendously slow on UltraSPARC I/II,
+ Karatsuba's method should save up to 16 cycles (i.e. > 20%).
+* mul_1 (and possibly the other multiply functions): Handle carry in the
+ same tricky way as add_n,sub_n.
diff --git a/rts/gmp/mpn/sparc64/add_n.asm b/rts/gmp/mpn/sparc64/add_n.asm
new file mode 100644
index 0000000000..72b3895a5b
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/add_n.asm
@@ -0,0 +1,172 @@
+! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_add_n)
+
+! 12 mem ops >= 12 cycles
+! 8 shift insn >= 8 cycles
+! 8 addccc, executing alone, +8 cycles
+! Unrolling not mandatory...perhaps 2-way is best?
+! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
+! All in all, it runs at 5 cycles/limb
+
+ save %sp,-160,%sp
+
+ addcc %g0,%g0,%g0
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(there)
+ nop
+
+ ldx [%i1+0],%l0
+ ldx [%i2+0],%l4
+ ldx [%i1+8],%l1
+ ldx [%i2+8],%l5
+ ldx [%i1+16],%l2
+ ldx [%i2+16],%l6
+ ldx [%i1+24],%l3
+ ldx [%i2+24],%l7
+ add %i1,32,%i1
+ add %i2,32,%i2
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(skip)
+ nop
+ b L(loop1) ! jump instead of executing many NOPs
+ nop
+ ALIGN(32)
+!--------- Start main loop ---------
+L(loop1):
+ addccc %l0,%l4,%g1
+!-
+ srlx %l0,32,%o0
+ ldx [%i1+0],%l0
+!-
+ srlx %l4,32,%o4
+ ldx [%i2+0],%l4
+!-
+ addccc %o0,%o4,%g0
+!-
+ addccc %l1,%l5,%g2
+!-
+ srlx %l1,32,%o1
+ ldx [%i1+8],%l1
+!-
+ srlx %l5,32,%o5
+ ldx [%i2+8],%l5
+!-
+ addccc %o1,%o5,%g0
+!-
+ addccc %l2,%l6,%g3
+!-
+ srlx %l2,32,%o2
+ ldx [%i1+16],%l2
+!-
+ srlx %l6,32,%g5 ! asymmetry
+ ldx [%i2+16],%l6
+!-
+ addccc %o2,%g5,%g0
+!-
+ addccc %l3,%l7,%g4
+!-
+ srlx %l3,32,%o3
+ ldx [%i1+24],%l3
+ add %i1,32,%i1
+!-
+ srlx %l7,32,%o7
+ ldx [%i2+24],%l7
+ add %i2,32,%i2
+!-
+ addccc %o3,%o7,%g0
+!-
+ stx %g1,[%i0+0]
+!-
+ stx %g2,[%i0+8]
+!-
+ stx %g3,[%i0+16]
+ add %i3,-4,%i3
+!-
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+ brgez,pt %i3,L(loop1)
+ nop
+!--------- End main loop ---------
+L(skip):
+ addccc %l0,%l4,%g1
+ srlx %l0,32,%o0
+ srlx %l4,32,%o4
+ addccc %o0,%o4,%g0
+ addccc %l1,%l5,%g2
+ srlx %l1,32,%o1
+ srlx %l5,32,%o5
+ addccc %o1,%o5,%g0
+ addccc %l2,%l6,%g3
+ srlx %l2,32,%o2
+ srlx %l6,32,%g5 ! asymmetry
+ addccc %o2,%g5,%g0
+ addccc %l3,%l7,%g4
+ srlx %l3,32,%o3
+ srlx %l7,32,%o7
+ addccc %o3,%o7,%g0
+ stx %g1,[%i0+0]
+ stx %g2,[%i0+8]
+ stx %g3,[%i0+16]
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+L(there):
+ add %i3,4,%i3
+ brz,pt %i3,L(end)
+ nop
+
+L(loop2):
+ ldx [%i1+0],%l0
+ add %i1,8,%i1
+ ldx [%i2+0],%l4
+ add %i2,8,%i2
+ srlx %l0,32,%g2
+ srlx %l4,32,%g3
+ addccc %l0,%l4,%g1
+ addccc %g2,%g3,%g0
+ stx %g1,[%i0+0]
+ add %i0,8,%i0
+ add %i3,-1,%i3
+ brgz,pt %i3,L(loop2)
+ nop
+
+L(end): addc %g0,%g0,%i0
+ ret
+ restore
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/sparc64/addmul1h.asm b/rts/gmp/mpn/sparc64/addmul1h.asm
new file mode 100644
index 0000000000..96cb5f7369
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/addmul1h.asm
@@ -0,0 +1,203 @@
+dnl SPARC 64-bit addmull/addmulu -- Helper for mpn_addmul_1 and mpn_mul_1.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+ifdef(`LOWPART',
+`addmull:',
+`addmulu:')
+ save %sp,-256,%sp
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1+4],%f11
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,E(end2)
+ std %f12,[%fp-17]
+
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end3)
+ std %f12,[%fp-33]
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,E(end4)
+ std %f12,[%fp-17]
+
+ b,a E(loop)
+ nop C nop is cheap to nullify
+
+ ALIGN(16)
+C BEGIN LOOP
+E(loop):
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ sub %i2,2,%i2
+ add %i0,4,%i0 C res_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ std %f12,[%fp-17]
+ brnz,pt %i2,E(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+E(loope):
+E(end4):
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ b,a E(yyy)
+
+E(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ifdef(`LOWPART',
+` ld [%i0+DHI],%g5')
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ ldx [%fp-33],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ifdef(`LOWPART',
+` add %g5,%g1,%g1') C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ifdef(`LOWPART',
+` st %g4,[%i0-4+DHI]
+ srlx %g4,32,%g4')
+
+ ret
+ restore %g0,%g4,%o0 C sideeffect: put cy in retreg
+ifdef(`LOWPART',
+`EPILOGUE(addmull)',
+`EPILOGUE(addmulu)')
diff --git a/rts/gmp/mpn/sparc64/addmul_1.asm b/rts/gmp/mpn/sparc64/addmul_1.asm
new file mode 100644
index 0000000000..c3f04cea6a
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/addmul_1.asm
@@ -0,0 +1,114 @@
+dnl SPARC 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+PROLOGUE(mpn_addmul_1)
+ save %sp,-256,%sp
+
+C We store 0.0 in f10 and keep it invariant accross thw two
+C function calls below. Note that this is not ABI conformant,
+C but since the functions are local, that's acceptable.
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hh(L(noll)),%g2
+ sethi %lm(L(noll)),%g1
+ or %g2,%hm(L(noll)),%g2
+ or %g1,%lo(L(noll)),%g1
+ sllx %g2,32,%g2
+ ld [%g1+%g2],%f10')
+
+ sub %i1,%i0,%g1
+ srlx %g1,3,%g1
+ cmp %g1,%i2
+ bcc,pt %xcc,L(nooverlap)
+ nop
+
+ sllx %i2,3,%g2 C compute stack allocation byte count
+ add %g2,15,%o0
+ and %o0,-16,%o0
+ sub %sp,%o0,%sp
+ add %sp,2223,%o0
+
+ mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp
+ call mpn_copyi
+ mov %i2,%o2 C copy n to mpn_copyi's count parameter
+
+ add %sp,2223,%i1
+
+L(nooverlap):
+C First multiply-add with low 32 bits of s2_limb
+ mov %i0,%o0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call addmull
+ srl %i3,0,%o3
+
+ mov %o0,%l0 C keep carry-out from accmull
+
+C Now multiply-add with high 32 bits of s2_limb, unless it is zero.
+ srlx %i3,32,%o3
+ brz,a,pn %o3,L(small)
+ mov %o0,%i0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call addmulu
+ add %i0,4,%o0
+
+ add %l0,%o0,%i0
+L(small):
+ ret
+ restore %g0,%g0,%g0
+EPILOGUE(mpn_addmul_1)
+
+C Put a zero in the text segment to allow us to t the address
+C quickly when compiling for PIC
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+define(`LO',`(+4)')
+define(`HI',`(-4)')
+
+define(`DLO',`(+4)')
+define(`DHI',`(-4)')
+define(`LOWPART')
+define(`E',`L(l.$1)')
+include_mpn(`sparc64/addmul1h.asm')
+
+define(`DLO',`(-4)')
+define(`DHI',`(+4)')
+undefine(`LOWPART')
+define(`E',`L(u.$1)')
+include_mpn(`sparc64/addmul1h.asm')
diff --git a/rts/gmp/mpn/sparc64/copyi.asm b/rts/gmp/mpn/sparc64/copyi.asm
new file mode 100644
index 0000000000..d9957e3c90
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/copyi.asm
@@ -0,0 +1,79 @@
+! SPARC v9 __gmpn_copy -- Copy a limb vector.
+
+! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rptr %o0
+! sptr %o1
+! n %o2
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_copyi)
+ add %o2,-8,%o2
+ brlz,pn %o2,L(skip)
+ nop
+ b,a L(loop1)
+ nop
+
+ ALIGN(16)
+L(loop1):
+ ldx [%o1+0],%g1
+ ldx [%o1+8],%g2
+ ldx [%o1+16],%g3
+ ldx [%o1+24],%g4
+ ldx [%o1+32],%g5
+ ldx [%o1+40],%o3
+ ldx [%o1+48],%o4
+ ldx [%o1+56],%o5
+ add %o1,64,%o1
+ stx %g1,[%o0+0]
+ stx %g2,[%o0+8]
+ stx %g3,[%o0+16]
+ stx %g4,[%o0+24]
+ stx %g5,[%o0+32]
+ stx %o3,[%o0+40]
+ stx %o4,[%o0+48]
+ stx %o5,[%o0+56]
+ add %o2,-8,%o2
+ brgez,pt %o2,L(loop1)
+ add %o0,64,%o0
+
+L(skip):
+ add %o2,8,%o2
+ brz,pt %o2,L(end)
+ nop
+
+L(loop2):
+ ldx [%o1],%g1
+ add %o1,8,%o1
+ add %o2,-1,%o2
+ stx %g1,[%o0]
+ add %o0,8,%o0
+ brgz,pt %o2,L(loop2)
+ nop
+
+L(end): retl
+ nop
+EPILOGUE(mpn_copyi)
diff --git a/rts/gmp/mpn/sparc64/gmp-mparam.h b/rts/gmp/mpn/sparc64/gmp-mparam.h
new file mode 100644
index 0000000000..74f61661c1
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/gmp-mparam.h
@@ -0,0 +1,88 @@
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Tell the toom3 multiply implementation to call low-level mpn
+ functions instead of open-coding operations in C. */
+#define USE_MORE_MPN 1
+
+
+/* Run on sun workshop cc. */
+/* Generated by tuneup.c, 2000-07-30. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 12
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 95
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 33
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 125
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 27
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 107
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 12
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 199
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 304, 608, 1344, 2304, 7168, 20480, 49152, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 320
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 1664
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 304, 608, 1344, 2816, 7168, 20480, 49152, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 320
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 1664
+#endif
diff --git a/rts/gmp/mpn/sparc64/lshift.asm b/rts/gmp/mpn/sparc64/lshift.asm
new file mode 100644
index 0000000000..2d2edc50a7
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/lshift.asm
@@ -0,0 +1,97 @@
+! SPARC v9 __gmpn_lshift --
+
+! Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_lshift)
+ sllx %o2,3,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ldx [%o1-8],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ srlx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop
+ mov %g1,%g5
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ldx [%o1-16],%g3
+ add %o0,-8,%o0
+ add %o1,-8,%o1
+ add %g4,-1,%g4
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,L(loop0)
+ stx %o4,[%o0+0]
+
+L(0): brz,pn %o2,L(end)
+ nop
+
+L(loop1):
+ ldx [%o1-16],%g3
+ add %o0,-32,%o0
+ add %o2,-4,%o2
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-24],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+24]
+ srlx %g2,%o5,%g1
+
+ ldx [%o1-32],%g3
+ sllx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0+16]
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-40],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+8]
+ srlx %g2,%o5,%g1
+
+ add %o1,-32,%o1
+ or %g4,%g1,%g4
+ brnz,pt %o2,L(loop1)
+ stx %g4,[%o0+0]
+
+L(end): sllx %g2,%o3,%g2
+ stx %g2,[%o0-8]
+ retl
+ mov %g5,%o0
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/sparc64/mul_1.asm b/rts/gmp/mpn/sparc64/mul_1.asm
new file mode 100644
index 0000000000..f2f2821d51
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/mul_1.asm
@@ -0,0 +1,113 @@
+dnl SPARC 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and
+dnl store the result to a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+PROLOGUE(mpn_mul_1)
+ save %sp,-256,%sp
+
+C We store 0.0 in f10 and keep it invariant accross thw two
+C function calls below. Note that this is not ABI conformant,
+C but since the functions are local, that's acceptable.
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hh(L(noll)),%g2
+ sethi %lm(L(noll)),%g1
+ or %g2,%hm(L(noll)),%g2
+ or %g1,%lo(L(noll)),%g1
+ sllx %g2,32,%g2
+ ld [%g1+%g2],%f10')
+
+ sub %i1,%i0,%g1
+ srlx %g1,3,%g1
+ cmp %g1,%i2
+ bcc,pt %xcc,L(nooverlap)
+ nop
+
+ sllx %i2,3,%g2 C compute stack allocation byte count
+ add %g2,15,%o0
+ and %o0,-16,%o0
+ sub %sp,%o0,%sp
+ add %sp,2223,%o0
+
+ mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp
+ call mpn_copyi
+ mov %i2,%o2 C copy n to mpn_copyi's count parameter
+
+ add %sp,2223,%i1
+
+L(nooverlap):
+C First multiply-add with low 32 bits of s2_limb
+ mov %i0,%o0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call mull
+ srl %i3,0,%o3
+
+ mov %o0,%l0 C keep carry-out from accmull
+
+C Now multiply-add with high 32 bits of s2_limb, unless it is zero.
+ srlx %i3,32,%o3
+ brz,a,pn %o3,L(small)
+ mov %o0,%i0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call addmulu
+ add %i0,4,%o0
+
+ add %l0,%o0,%i0
+L(small):
+ ret
+ restore %g0,%g0,%g0
+EPILOGUE(mpn_mul_1)
+
+C Put a zero in the text segment to allow us to t the address
+C quickly when compiling for PIC
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+define(`LO',`(+4)')
+define(`HI',`(-4)')
+
+define(`DLO',`(+4)')
+define(`DHI',`(-4)')
+define(`E',`L($1)')
+include_mpn(`sparc64/mul_1h.asm')
+
+define(`DLO',`(-4)')
+define(`DHI',`(+4)')
+undefine(`LOWPART')
+define(`E',`L(u.$1)')
+include_mpn(`sparc64/addmul1h.asm')
diff --git a/rts/gmp/mpn/sparc64/mul_1h.asm b/rts/gmp/mpn/sparc64/mul_1h.asm
new file mode 100644
index 0000000000..5078c01c3f
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/mul_1h.asm
@@ -0,0 +1,183 @@
+dnl SPARC 64-bit mull -- Helper for mpn_mul_1.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+mull:
+ save %sp,-256,%sp
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1+4],%f11
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,E(end2)
+ std %f12,[%fp-17]
+
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end3)
+ std %f12,[%fp-33]
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,E(end4)
+ std %f12,[%fp-17]
+
+ b,a E(loop)
+ nop C nop is cheap to nullify
+
+ ALIGN(16)
+C BEGIN LOOP
+E(loop):
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ sub %i2,2,%i2
+ add %i0,4,%i0 C res_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ std %f12,[%fp-17]
+ brnz,pt %i2,E(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+E(loope):
+E(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ b,a E(yyy)
+
+E(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ ldx [%fp-33],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ st %g4,[%i0-4+DHI]
+ srlx %g4,32,%g4
+
+ ret
+ restore %g0,%g4,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mull)
diff --git a/rts/gmp/mpn/sparc64/rshift.asm b/rts/gmp/mpn/sparc64/rshift.asm
new file mode 100644
index 0000000000..baf7920efb
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/rshift.asm
@@ -0,0 +1,94 @@
+! SPARC v9 __gmpn_rshift --
+
+! Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_rshift)
+ ldx [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ sllx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop
+ mov %g1,%g5
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ldx [%o1+8],%g3
+ add %o0,8,%o0
+ add %o1,8,%o1
+ add %g4,-1,%g4
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,L(loop0)
+ stx %o4,[%o0-8]
+
+L(0): brz,pn %o2,L(end)
+ nop
+
+L(loop1):
+ ldx [%o1+8],%g3
+ add %o0,32,%o0
+ add %o2,-4,%o2
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+16],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-32]
+ sllx %g2,%o5,%g1
+
+ ldx [%o1+24],%g3
+ srlx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0-24]
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+32],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-16]
+ sllx %g2,%o5,%g1
+
+ add %o1,32,%o1
+ or %g4,%g1,%g4
+ brnz %o2,L(loop1)
+ stx %g4,[%o0-8]
+
+L(end): srlx %g2,%o3,%g2
+ stx %g2,[%o0-0]
+ retl
+ mov %g5,%o0
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/sparc64/sub_n.asm b/rts/gmp/mpn/sparc64/sub_n.asm
new file mode 100644
index 0000000000..61547138e0
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/sub_n.asm
@@ -0,0 +1,172 @@
+! SPARC v9 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_sub_n)
+
+! 12 mem ops >= 12 cycles
+! 8 shift insn >= 8 cycles
+! 8 addccc, executing alone, +8 cycles
+! Unrolling not mandatory...perhaps 2-way is best?
+! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
+! All in all, it runs at 5 cycles/limb
+
+ save %sp,-160,%sp
+
+ addcc %g0,%g0,%g0
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(there)
+ nop
+
+ ldx [%i1+0],%l0
+ ldx [%i2+0],%l4
+ ldx [%i1+8],%l1
+ ldx [%i2+8],%l5
+ ldx [%i1+16],%l2
+ ldx [%i2+16],%l6
+ ldx [%i1+24],%l3
+ ldx [%i2+24],%l7
+ add %i1,32,%i1
+ add %i2,32,%i2
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(skip)
+ nop
+ b L(loop1) ! jump instead of executing many NOPs
+ nop
+ ALIGN(32)
+!--------- Start main loop ---------
+L(loop1):
+ subccc %l0,%l4,%g1
+!-
+ srlx %l0,32,%o0
+ ldx [%i1+0],%l0
+!-
+ srlx %l4,32,%o4
+ ldx [%i2+0],%l4
+!-
+ subccc %o0,%o4,%g0
+!-
+ subccc %l1,%l5,%g2
+!-
+ srlx %l1,32,%o1
+ ldx [%i1+8],%l1
+!-
+ srlx %l5,32,%o5
+ ldx [%i2+8],%l5
+!-
+ subccc %o1,%o5,%g0
+!-
+ subccc %l2,%l6,%g3
+!-
+ srlx %l2,32,%o2
+ ldx [%i1+16],%l2
+!-
+ srlx %l6,32,%g5 ! asymmetry
+ ldx [%i2+16],%l6
+!-
+ subccc %o2,%g5,%g0
+!-
+ subccc %l3,%l7,%g4
+!-
+ srlx %l3,32,%o3
+ ldx [%i1+24],%l3
+ add %i1,32,%i1
+!-
+ srlx %l7,32,%o7
+ ldx [%i2+24],%l7
+ add %i2,32,%i2
+!-
+ subccc %o3,%o7,%g0
+!-
+ stx %g1,[%i0+0]
+!-
+ stx %g2,[%i0+8]
+!-
+ stx %g3,[%i0+16]
+ add %i3,-4,%i3
+!-
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+ brgez,pt %i3,L(loop1)
+ nop
+!--------- End main loop ---------
+L(skip):
+ subccc %l0,%l4,%g1
+ srlx %l0,32,%o0
+ srlx %l4,32,%o4
+ subccc %o0,%o4,%g0
+ subccc %l1,%l5,%g2
+ srlx %l1,32,%o1
+ srlx %l5,32,%o5
+ subccc %o1,%o5,%g0
+ subccc %l2,%l6,%g3
+ srlx %l2,32,%o2
+ srlx %l6,32,%g5 ! asymmetry
+ subccc %o2,%g5,%g0
+ subccc %l3,%l7,%g4
+ srlx %l3,32,%o3
+ srlx %l7,32,%o7
+ subccc %o3,%o7,%g0
+ stx %g1,[%i0+0]
+ stx %g2,[%i0+8]
+ stx %g3,[%i0+16]
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+L(there):
+ add %i3,4,%i3
+ brz,pt %i3,L(end)
+ nop
+
+L(loop2):
+ ldx [%i1+0],%l0
+ add %i1,8,%i1
+ ldx [%i2+0],%l4
+ add %i2,8,%i2
+ srlx %l0,32,%g2
+ srlx %l4,32,%g3
+ subccc %l0,%l4,%g1
+ subccc %g2,%g3,%g0
+ stx %g1,[%i0+0]
+ add %i0,8,%i0
+ add %i3,-1,%i3
+ brgz,pt %i3,L(loop2)
+ nop
+
+L(end): addc %g0,%g0,%i0
+ ret
+ restore
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/sparc64/submul1h.asm b/rts/gmp/mpn/sparc64/submul1h.asm
new file mode 100644
index 0000000000..7f51ba59c6
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/submul1h.asm
@@ -0,0 +1,204 @@
+dnl SPARC 64-bit submull/submulu -- Helper for mpn_submul_1 and mpn_mul_1.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+ifdef(`LOWPART',
+`submull:',
+`submulu:')
+ save %sp,-256,%sp
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1+4],%f11
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,E(end2)
+ std %f12,[%fp-17]
+
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end3)
+ std %f12,[%fp-33]
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,E(end4)
+ std %f12,[%fp-17]
+
+ b,a E(loop)
+ nop C nop is cheap to nullify
+
+ ALIGN(16)
+C BEGIN LOOP
+E(loop):
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ sub %i2,2,%i2
+ add %i0,4,%i0 C res_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DHI]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ std %f12,[%fp-17]
+ brnz,pt %i2,E(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+E(loope):
+E(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DHI]
+ b,a E(yyy)
+
+E(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ifdef(`LOWPART',
+` ld [%i0+DHI],%g5')
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ ldx [%fp-33],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DLO]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ifdef(`LOWPART',
+` subxcc %g5,%g4,%l2') C add *res_ptr to p0 (ADD2)
+ifdef(`LOWPART',
+` st %l2,[%i0-4+DHI]
+ srlx %g4,32,%g4')
+
+ addx %g4,0,%g4
+ ret
+ restore %g0,%g4,%o0 C sideeffect: put cy in retreg
+ifdef(`LOWPART',
+`EPILOGUE(submull)',
+`EPILOGUE(submulu)')
diff --git a/rts/gmp/mpn/sparc64/submul_1.asm b/rts/gmp/mpn/sparc64/submul_1.asm
new file mode 100644
index 0000000000..7c6af0a98b
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/submul_1.asm
@@ -0,0 +1,114 @@
+dnl SPARC 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+PROLOGUE(mpn_submul_1)
+ save %sp,-256,%sp
+
+C We store 0.0 in f10 and keep it invariant accross thw two
+C function calls below. Note that this is not ABI conformant,
+C but since the functions are local, that's acceptable.
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hh(L(noll)),%g2
+ sethi %lm(L(noll)),%g1
+ or %g2,%hm(L(noll)),%g2
+ or %g1,%lo(L(noll)),%g1
+ sllx %g2,32,%g2
+ ld [%g1+%g2],%f10')
+
+ sub %i1,%i0,%g1
+ srlx %g1,3,%g1
+ cmp %g1,%i2
+ bcc,pt %xcc,L(nooverlap)
+ nop
+
+ sllx %i2,3,%g2 C compute stack allocation byte count
+ add %g2,15,%o0
+ and %o0,-16,%o0
+ sub %sp,%o0,%sp
+ add %sp,2223,%o0
+
+ mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp
+ call mpn_copyi
+ mov %i2,%o2 C copy n to mpn_copyi's count parameter
+
+ add %sp,2223,%i1
+
+L(nooverlap):
+C First multiply-add with low 32 bits of s2_limb
+ mov %i0,%o0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call submull
+ srl %i3,0,%o3
+
+ mov %o0,%l0 C keep carry-out from accmull
+
+C Now multiply-add with high 32 bits of s2_limb, unless it is zero.
+ srlx %i3,32,%o3
+ brz,a,pn %o3,L(small)
+ mov %o0,%i0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call submulu
+ add %i0,4,%o0
+
+ add %l0,%o0,%i0
+L(small):
+ ret
+ restore %g0,%g0,%g0
+EPILOGUE(mpn_submul_1)
+
+C Put a zero in the text segment to allow us to t the address
+C quickly when compiling for PIC
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+define(`LO',`(+4)')
+define(`HI',`(-4)')
+
+define(`DLO',`(+4)')
+define(`DHI',`(-4)')
+define(`LOWPART')
+define(`E',`L(l.$1)')
+include_mpn(`sparc64/submul1h.asm')
+
+define(`DLO',`(-4)')
+define(`DHI',`(+4)')
+undefine(`LOWPART')
+define(`E',`L(u.$1)')
+include_mpn(`sparc64/submul1h.asm')
diff --git a/rts/gmp/mpn/thumb/add_n.s b/rts/gmp/mpn/thumb/add_n.s
new file mode 100644
index 0000000000..c1eeb6ca87
--- /dev/null
+++ b/rts/gmp/mpn/thumb/add_n.s
@@ -0,0 +1,50 @@
+@ ARM/Thumb __gmpn_add -- Add two limb vectors of the same length > 0 and store
+@ sum in a third limb vector.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+
+@ INPUT PARAMETERS
+@ RES_ptr r0
+@ S1_ptr r1
+@ S2_ptr r2
+@ SIZE r3
+
+@ NOT TESTED CODE
+
+ .text
+ .thumb
+ .align 0
+ .global ___gmpn_add_n
+___gmpn_add_n:
+ push {r4, r5, r6, lr}
+ mov r6, #1 @ init carry save register
+
+Loop: sub r6, #1 @ restore carry (set iff r6 was 0)
+ ldmia r1!, {r4} @ load next limb from S1
+ ldmia r2!, {r5} @ load next limb from S2
+ adc r4, r5
+ stmia r0!, {r4} @ store result limb to RES
+ sbc r6, r6 @ save negated carry
+ sub r3, #1
+ bge Loop @ loop back while remaining count >= 4
+
+ mov r0, r6
+ pop {r4, r5, r6, pc}
diff --git a/rts/gmp/mpn/thumb/sub_n.s b/rts/gmp/mpn/thumb/sub_n.s
new file mode 100644
index 0000000000..53c292375f
--- /dev/null
+++ b/rts/gmp/mpn/thumb/sub_n.s
@@ -0,0 +1,50 @@
+@ ARM/Thumb __gmpn_sub -- Subtract two limb vectors of the same length > 0 and
+@ store difference in a third limb vector.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+
+@ INPUT PARAMETERS
+@ RES_ptr r0
+@ S1_ptr r1
+@ S2_ptr r2
+@ SIZE r3
+
+@ NOT TESTED CODE
+
+ .text
+ .thumb
+ .align 0
+ .global ___gmpn_sub_n
+___gmpn_sub_n:
+ push {r4, r5, r6, lr}
+ mov r6, #1 @ init carry save register
+
+Loop: sub r6, #1 @ restore carry (set iff r6 was 0)
+ ldmia r1!, {r4} @ load next limb from S1
+ ldmia r2!, {r5} @ load next limb from S2
+ sbc r4, r5
+ stmia r0!, {r4} @ store result limb to RES
+ sbc r6, r6 @ save negated carry
+ sub r3, #1
+ bge Loop @ loop back while remaining count >= 4
+
+ mov r0, r6
+ pop {r4, r5, r6, pc}
diff --git a/rts/gmp/mpn/underscore.h b/rts/gmp/mpn/underscore.h
new file mode 100644
index 0000000000..240dae0f63
--- /dev/null
+++ b/rts/gmp/mpn/underscore.h
@@ -0,0 +1,26 @@
+/*
+Copyright (C) 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#if __STDC__
+#define C_SYMBOL_NAME(name) _##name
+#else
+#define C_SYMBOL_NAME(name) _/**/name
+#endif
diff --git a/rts/gmp/mpn/vax/add_n.s b/rts/gmp/mpn/vax/add_n.s
new file mode 100644
index 0000000000..cf4060f521
--- /dev/null
+++ b/rts/gmp/mpn/vax/add_n.s
@@ -0,0 +1,61 @@
+# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# s2_ptr (sp + 12)
+# size (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ .word 0x0
+ movl 16(ap),r0
+ movl 12(ap),r1
+ movl 8(ap),r2
+ movl 4(ap),r3
+ mnegl r0,r5
+ addl2 $3,r0
+ ashl $-2,r0,r0 # unroll loop count
+ bicl2 $-4,r5 # mask out low 2 bits
+ movaq (r5)[r5],r5 # 9x
+ jmp Loop(r5)
+
+Loop: movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ sobgtr r0,Loop
+
+ adwc r0,r0
+ ret
diff --git a/rts/gmp/mpn/vax/addmul_1.s b/rts/gmp/mpn/vax/addmul_1.s
new file mode 100644
index 0000000000..379061dcb7
--- /dev/null
+++ b/rts/gmp/mpn/vax/addmul_1.s
@@ -0,0 +1,126 @@
+# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# size (sp + 12)
+# s2_limb (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+ .word 0xfc0
+ movl 12(ap),r4
+ movl 8(ap),r8
+ movl 4(ap),r9
+ movl 16(ap),r6
+ jlss s2_big
+
+ clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L1
+ clrl r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl (r8)+,r1
+ jlss L1n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc $0,r3
+ addl2 r2,(r9)+
+ adwc $0,r3
+L1: movl (r8)+,r1
+ jlss L1n1
+L1p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc $0,r11
+ addl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+L1n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ addl2 r2,(r9)+
+ adwc $0,r3
+ movl (r8)+,r1
+ jgeq L1p1
+L1n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ addl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+
+s2_big: clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L2
+ clrl r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl (r8)+,r1
+ jlss L2n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r1,r3
+ addl2 r2,(r9)+
+ adwc $0,r3
+L2: movl (r8)+,r1
+ jlss L2n1
+L2p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r1,r11
+ addl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
+
+L2n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ addl2 r2,(r9)+
+ adwc r1,r3
+ movl (r8)+,r1
+ jgeq L2p1
+L2n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ addl2 r10,(r9)+
+ adwc r1,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
diff --git a/rts/gmp/mpn/vax/lshift.s b/rts/gmp/mpn/vax/lshift.s
new file mode 100644
index 0000000000..fd311a9782
--- /dev/null
+++ b/rts/gmp/mpn/vax/lshift.s
@@ -0,0 +1,58 @@
+# VAX __gmpn_lshift -- left shift.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr (sp + 4)
+# sptr (sp + 8)
+# size (sp + 12)
+# cnt (sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers
+
+.text
+ .align 1
+.globl ___gmpn_lshift
+___gmpn_lshift:
+ .word 0x1c0
+ movl 4(ap),r7
+ movl 8(ap),r6
+ movl 12(ap),r1
+ movl 16(ap),r8
+
+ moval (r6)[r1],r6
+ moval (r7)[r1],r7
+ clrl r3
+ movl -(r6),r2
+ ashq r8,r2,r4
+ movl r5,r0
+ movl r2,r3
+ decl r1
+ jeql Lend
+
+Loop: movl -(r6),r2
+ ashq r8,r2,r4
+ movl r5,-(r7)
+ movl r2,r3
+ jsobgtr r1,Loop
+
+Lend: movl r4,-4(r7)
+ ret
diff --git a/rts/gmp/mpn/vax/mul_1.s b/rts/gmp/mpn/vax/mul_1.s
new file mode 100644
index 0000000000..708e8ca6ca
--- /dev/null
+++ b/rts/gmp/mpn/vax/mul_1.s
@@ -0,0 +1,123 @@
+# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# size (sp + 12)
+# s2_limb (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ .word 0xfc0
+ movl 12(ap),r4
+ movl 8(ap),r8
+ movl 4(ap),r9
+ movl 16(ap),r6
+ jlss s2_big
+
+# One might want to combine the addl2 and the store below, but that
+# is actually just slower according to my timing tests. (VAX 3600)
+
+ clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L1
+ clrl r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl (r8)+,r1
+ jlss L1n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc $0,r3
+ movl r2,(r9)+
+L1: movl (r8)+,r1
+ jlss L1n1
+L1p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc $0,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+L1n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ movl r2,(r9)+
+ movl (r8)+,r1
+ jgeq L1p1
+L1n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+
+s2_big: clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L2
+ clrl r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl (r8)+,r1
+ jlss L2n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r1,r3
+ movl r2,(r9)+
+L2: movl (r8)+,r1
+ jlss L2n1
+L2p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r1,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
+
+L2n0: emul r1,r6,$0,r2
+ addl2 r1,r3
+ addl2 r11,r2
+ adwc r6,r3
+ movl r2,(r9)+
+ movl (r8)+,r1
+ jgeq L2p1
+L2n1: emul r1,r6,$0,r10
+ addl2 r1,r11
+ addl2 r3,r10
+ adwc r6,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
diff --git a/rts/gmp/mpn/vax/rshift.s b/rts/gmp/mpn/vax/rshift.s
new file mode 100644
index 0000000000..515813208d
--- /dev/null
+++ b/rts/gmp/mpn/vax/rshift.s
@@ -0,0 +1,56 @@
+# VAX __gmpn_rshift -- right shift.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr (sp + 4)
+# sptr (sp + 8)
+# size (sp + 12)
+# cnt (sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers
+
+.text
+ .align 1
+.globl ___gmpn_rshift
+___gmpn_rshift:
+ .word 0x1c0
+ movl 4(ap),r7
+ movl 8(ap),r6
+ movl 12(ap),r1
+ movl 16(ap),r8
+
+ movl (r6)+,r2
+ subl3 r8,$32,r8
+ ashl r8,r2,r0
+ decl r1
+ jeql Lend
+
+Loop: movl (r6)+,r3
+ ashq r8,r2,r4
+ movl r5,(r7)+
+ movl r3,r2
+ jsobgtr r1,Loop
+
+Lend: clrl r3
+ ashq r8,r2,r4
+ movl r5,(r7)
+ ret
diff --git a/rts/gmp/mpn/vax/sub_n.s b/rts/gmp/mpn/vax/sub_n.s
new file mode 100644
index 0000000000..eff4b1c044
--- /dev/null
+++ b/rts/gmp/mpn/vax/sub_n.s
@@ -0,0 +1,61 @@
+# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+# difference in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# s2_ptr (sp + 12)
+# size (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ .word 0x0
+ movl 16(ap),r0
+ movl 12(ap),r1
+ movl 8(ap),r2
+ movl 4(ap),r3
+ mnegl r0,r5
+ addl2 $3,r0
+ ashl $-2,r0,r0 # unroll loop count
+ bicl2 $-4,r5 # mask out low 2 bits
+ movaq (r5)[r5],r5 # 9x
+ jmp Loop(r5)
+
+Loop: movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ sobgtr r0,Loop
+
+ adwc r0,r0
+ ret
diff --git a/rts/gmp/mpn/vax/submul_1.s b/rts/gmp/mpn/vax/submul_1.s
new file mode 100644
index 0000000000..be42286935
--- /dev/null
+++ b/rts/gmp/mpn/vax/submul_1.s
@@ -0,0 +1,126 @@
+# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# size (sp + 12)
+# s2_limb (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+ .word 0xfc0
+ movl 12(ap),r4
+ movl 8(ap),r8
+ movl 4(ap),r9
+ movl 16(ap),r6
+ jlss s2_big
+
+ clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L1
+ clrl r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl (r8)+,r1
+ jlss L1n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc $0,r3
+ subl2 r2,(r9)+
+ adwc $0,r3
+L1: movl (r8)+,r1
+ jlss L1n1
+L1p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc $0,r11
+ subl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+L1n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ subl2 r2,(r9)+
+ adwc $0,r3
+ movl (r8)+,r1
+ jgeq L1p1
+L1n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ subl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+
+s2_big: clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L2
+ clrl r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl (r8)+,r1
+ jlss L2n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r1,r3
+ subl2 r2,(r9)+
+ adwc $0,r3
+L2: movl (r8)+,r1
+ jlss L2n1
+L2p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r1,r11
+ subl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
+
+L2n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ subl2 r2,(r9)+
+ adwc r1,r3
+ movl (r8)+,r1
+ jgeq L2p1
+L2n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ subl2 r10,(r9)+
+ adwc r1,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
diff --git a/rts/gmp/mpn/x86/README b/rts/gmp/mpn/x86/README
new file mode 100644
index 0000000000..3507548b8c
--- /dev/null
+++ b/rts/gmp/mpn/x86/README
@@ -0,0 +1,40 @@
+
+ X86 MPN SUBROUTINES
+
+
+This directory contains mpn functions for various 80x86 chips.
+
+
+CODE ORGANIZATION
+
+ x86 i386, i486, generic
+ x86/pentium Intel Pentium (P5, P54)
+ x86/pentium/mmx Intel Pentium with MMX (P55)
+ x86/p6 Intel Pentium Pro
+ x86/p6/mmx Intel Pentium II, III
+ x86/p6/p3mmx Intel Pentium III
+ x86/k6 AMD K6, K6-2, K6-3
+ x86/k6/mmx
+ x86/k6/k62mmx AMD K6-2
+ x86/k7 AMD Athlon
+ x86/k7/mmx
+
+
+The x86 directory is also the main support for P6 at the moment, and
+is something of a blended style, meant to be reasonable on all x86s.
+
+
+
+STATUS
+
+The code is well-optimized for AMD and Intel chips, but not so well
+optimized for Cyrix chips.
+
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+For implementations with slow double shift instructions (SHLD and
+SHRD), it might be better to mimic their operation with SHL+SHR+OR.
+(M2 is likely to benefit from that, but not Pentium due to its slow
+plain SHL and SHR.)
diff --git a/rts/gmp/mpn/x86/README.family b/rts/gmp/mpn/x86/README.family
new file mode 100644
index 0000000000..3bc73f58b0
--- /dev/null
+++ b/rts/gmp/mpn/x86/README.family
@@ -0,0 +1,333 @@
+
+ X86 CPU FAMILY MPN SUBROUTINES
+
+
+This file has some notes on things common to all the x86 family code.
+
+
+
+ASM FILES
+
+The x86 .asm files are BSD style x86 assembler code, first put through m4
+for macro processing. The generic mpn/asm-defs.m4 is used, together with
+mpn/x86/x86-defs.m4. Detailed notes are in those files.
+
+The code is meant for use with GNU "gas" or a system "as". There's no
+support for assemblers that demand Intel style, and with gas freely
+available and easy to use that shouldn't be a problem.
+
+
+
+STACK FRAME
+
+m4 macros are used to define the parameters passed on the stack, and these
+act like comments on what the stack frame looks like too. For example,
+mpn_mul_1() has the following.
+
+ defframe(PARAM_MULTIPLIER, 16)
+ defframe(PARAM_SIZE, 12)
+ defframe(PARAM_SRC, 8)
+ defframe(PARAM_DST, 4)
+
+Here PARAM_MULTIPLIER gets defined as `FRAME+16(%esp)', and the others
+similarly. The return address is at offset 0, but there's not normally any
+need to access that.
+
+FRAME is redefined as necessary through the code so it's the number of bytes
+pushed on the stack, and hence the offsets in the parameter macros stay
+correct. At the start of a routine FRAME should be zero.
+
+ deflit(`FRAME',0)
+ ...
+ deflit(`FRAME',4)
+ ...
+ deflit(`FRAME',8)
+ ...
+
+Helper macros FRAME_pushl(), FRAME_popl(), FRAME_addl_esp() and
+FRAME_subl_esp() exist to adjust FRAME for the effect of those instructions,
+and can be used instead of explicit definitions if preferred.
+defframe_pushl() is a combination FRAME_pushl() and defframe().
+
+There's generally some slackness in redefining FRAME. If new values aren't
+going to get used, then the redefinitions are omitted to keep from
+cluttering up the code. This happens for instance at the end of a routine,
+where there might be just four register pops and then a ret, so FRAME isn't
+getting used.
+
+Local variables and saved registers can be similarly defined, with negative
+offsets representing stack space below the initial stack pointer. For
+example,
+
+ defframe(SAVE_ESI, -4)
+ defframe(SAVE_EDI, -8)
+ defframe(VAR_COUNTER,-12)
+
+ deflit(STACK_SPACE, 12)
+
+Here STACK_SPACE gets used in a "subl $STACK_SPACE, %esp" to allocate the
+space, and that instruction must be followed by a redefinition of FRAME
+(setting it equal to STACK_SPACE) to reflect the change in %esp.
+
+Definitions for pushed registers are only put in when they're going to be
+used. If registers are just saved and restored with pushes and pops then
+definitions aren't made.
+
+
+
+ASSEMBLER EXPRESSIONS
+
+Only addition and subtraction seem to be universally available, certainly
+that's all the Solaris 8 "as" seems to accept. If expressions are wanted
+then m4 eval() should be used.
+
+In particular note that a "/" anywhere in a line starts a comment in Solaris
+"as", and in some configurations of gas too.
+
+ addl $32/2, %eax <-- wrong
+
+ addl $eval(32/2), %eax <-- right
+
+Binutils gas/config/tc-i386.c has a choice between "/" being a comment
+anywhere in a line, or only at the start. FreeBSD patches 2.9.1 to select
+the latter, and as of 2.9.5 it's the default for GNU/Linux too.
+
+
+
+ASSEMBLER COMMENTS
+
+Solaris "as" doesn't support "#" commenting, using /* */ instead,
+unfortunately. For that reason "C" commenting is used (see asm-defs.m4) and
+the intermediate ".s" files have no comments.
+
+
+
+ZERO DISPLACEMENTS
+
+In a couple of places addressing modes like 0(%ebx) with a byte-sized zero
+displacement are wanted, rather than (%ebx) with no displacement. These are
+either for computed jumps or to get desirable code alignment. Explicit
+.byte sequences are used to ensure the assembler doesn't turn 0(%ebx) into
+(%ebx). The Zdisp() macro in x86-defs.m4 is used for this.
+
+Current gas 2.9.5 or recent 2.9.1 leave 0(%ebx) as written, but old gas
+1.92.3 changes it. In general changing would be the sort of "optimization"
+an assembler might perform, hence explicit ".byte"s are used where
+necessary.
+
+
+
+SHLD/SHRD INSTRUCTIONS
+
+The %cl count forms of double shift instructions like "shldl %cl,%eax,%ebx"
+must be written "shldl %eax,%ebx" for some assemblers. gas takes either,
+Solaris "as" doesn't allow %cl, gcc generates %cl for gas and NeXT (which is
+gas), and omits %cl elsewhere.
+
+For GMP an autoconf test is used to determine whether %cl should be used and
+the macros shldl, shrdl, shldw and shrdw in mpn/x86/x86-defs.m4 then pass
+through or omit %cl as necessary. See comments with those macros for usage.
+
+
+
+DIRECTION FLAG
+
+The x86 calling conventions say that the direction flag should be clear at
+function entry and exit. (See iBCS2 and SVR4 ABI books, references below.)
+
+Although this has been so since the year dot, it's not absolutely clear
+whether it's universally respected. Since it's better to be safe than
+sorry, gmp follows glibc and does a "cld" if it depends on the direction
+flag being clear. This happens only in a few places.
+
+
+
+POSITION INDEPENDENT CODE
+
+Defining the symbol PIC in m4 processing selects position independent code.
+This mainly affects computed jumps, and these are implemented in a
+self-contained fashion (without using the global offset table). The few
+calls from assembly code to global functions use the normal procedure
+linkage table.
+
+PIC is necessary for ELF shared libraries because they can be mapped into
+different processes at different virtual addresses. Text relocations in
+shared libraries are allowed, but that presumably means a page with such a
+relocation isn't shared. The use of the PLT for PIC adds a fixed cost to
+every function call, which is small but might be noticeable when working with
+small operands.
+
+Calls from one library function to another don't need to go through the PLT,
+since of course the call instruction uses a displacement, not an absolute
+address, and the relative locations of object files are known when libgmp.so
+is created. "ld -Bsymbolic" (or "gcc -Wl,-Bsymbolic") will resolve calls
+this way, so that there's no jump through the PLT, but of course leaving
+setups of the GOT address in %ebx that may be unnecessary.
+
+The %ebx setup could be avoided in assembly if a separate option controlled
+PIC for calls as opposed to computed jumps etc. But there's only ever
+likely to be a handful of calls out of assembler, and getting the same
+optimization for C intra-library calls would be more important. There seems
+no easy way to tell gcc that certain functions can be called non-PIC, and
+unfortunately many gmp functions use the global memory allocation variables,
+so they need the GOT anyway. Object files with no global data references
+and only intra-library calls could go into the library as non-PIC under
+-Bsymbolic. Integrating this into libtool and automake is left as an
+exercise for the reader.
+
+
+
+SIMPLE LOOPS
+
+The overheads in setting up for an unrolled loop can mean that at small
+sizes a simple loop is faster. Making small sizes go fast is important,
+even if it adds a cycle or two to bigger sizes. To this end various
+routines choose between a simple loop and an unrolled loop according to
+operand size. The path to the simple loop, or to special case code for
+small sizes, is always as fast as possible.
+
+Adding a simple loop requires a conditional jump to choose between the
+simple and unrolled code. The size of a branch misprediction penalty
+affects whether a simple loop is worthwhile.
+
+The convention is for an m4 definition UNROLL_THRESHOLD to set the crossover
+point, with sizes < UNROLL_THRESHOLD using the simple loop, sizes >=
+UNROLL_THRESHOLD using the unrolled loop. If position independent code adds
+a couple of cycles to an unrolled loop setup, the threshold will vary with
+PIC or non-PIC. Something like the following is typical.
+
+ ifdef(`PIC',`
+ deflit(UNROLL_THRESHOLD, 10)
+ ',`
+ deflit(UNROLL_THRESHOLD, 8)
+ ')
+
+There's no automated way to determine the threshold. Setting it to a small
+value and then to a big value makes it possible to measure the simple and
+unrolled loops each over a range of sizes, from which the crossover point
+can be determined. Alternately, just adjust the threshold up or down until
+there's no more speedups.
+
+
+
+UNROLLED LOOP CODING
+
+The x86 addressing modes allow a byte displacement of -128 to +127, making
+it possible to access 256 bytes, which is 64 limbs, without adjusting
+pointer registers within the loop. Dword sized displacements can be used
+too, but they increase code size, and unrolling to 64 ought to be enough.
+
+When unrolling to the full 64 limbs/loop, the limb at the top of the loop
+will have a displacement of -128, so pointers have to have a corresponding
++128 added before entering the loop. When unrolling to 32 limbs/loop
+displacements 0 to 127 can be used with 0 at the top of the loop and no
+adjustment needed to the pointers.
+
+Where 64 limbs/loop is supported, the +128 adjustment is done only when 64
+limbs/loop is selected. Usually the gain in speed using 64 instead of 32 or
+16 is small, so support for 64 limbs/loop is generally only for comparison.
+
+
+
+COMPUTED JUMPS
+
+When working from least significant limb to most significant limb (most
+routines) the computed jump and pointer calculations in preparation for an
+unrolled loop are as follows.
+
+ S = operand size in limbs
+ N = number of limbs per loop (UNROLL_COUNT)
+ L = log2 of unrolling (UNROLL_LOG2)
+ M = mask for unrolling (UNROLL_MASK)
+ C = code bytes per limb in the loop
+ B = bytes per limb (4 for x86)
+
+ computed jump (-S & M) * C + entrypoint
+ subtract from pointers (-S & M) * B
+ initial loop counter (S-1) >> L
+ displacements 0 to B*(N-1)
+
+The loop counter is decremented at the end of each loop, and the looping
+stops when the decrement takes the counter to -1. The displacements are for
+the addressing accessing each limb, eg. a load with "movl disp(%ebx), %eax".
+
+Usually the multiply by "C" can be handled without an imul, using instead an
+leal, or a shift and subtract.
+
+When working from most significant to least significant limb (eg. mpn_lshift
+and mpn_copyd), the calculations change as follows.
+
+ add to pointers (-S & M) * B
+ displacements 0 to -B*(N-1)
+
+
+
+OLD GAS 1.92.3
+
+This version comes with FreeBSD 2.2.8 and has a couple of gremlins that
+affect gmp code.
+
+Firstly, an expression involving two forward references to labels comes out
+as zero. For example,
+
+ addl $bar-foo, %eax
+ foo:
+ nop
+ bar:
+
+This should lead to "addl $1, %eax", but it comes out as "addl $0, %eax".
+When only one forward reference is involved, it works correctly, as for
+example,
+
+ foo:
+ addl $bar-foo, %eax
+ nop
+ bar:
+
+Secondly, an expression involving two labels can't be used as the
+displacement for an leal. For example,
+
+ foo:
+ nop
+ bar:
+ leal bar-foo(%eax,%ebx,8), %ecx
+
+A slightly cryptic error is given, "Unimplemented segment type 0 in
+parse_operand". When only one label is used it's ok, and the label can be a
+forward reference too, as for example,
+
+ leal foo(%eax,%ebx,8), %ecx
+ nop
+ foo:
+
+These problems only affect PIC computed jump calculations. The workarounds
+are just to do an leal without a displacement and then an addl, and to make
+sure the code is placed so that there's at most one forward reference in the
+addl.
+
+
+
+REFERENCES
+
+"Intel Architecture Software Developer's Manual", volumes 1 to 3, 1999,
+order numbers 243190, 243191 and 243192. Available on-line,
+
+ ftp://download.intel.com/design/PentiumII/manuals/243190.htm
+ ftp://download.intel.com/design/PentiumII/manuals/243191.htm
+ ftp://download.intel.com/design/PentiumII/manuals/243192.htm
+
+"Intel386 Family Binary Compatibility Specification 2", Intel Corporation,
+published by McGraw-Hill, 1991, ISBN 0-07-031219-2.
+
+"System V Application Binary Interface", Unix System Laboratories Inc, 1992,
+published by Prentice Hall, ISBN 0-13-880410-9. And the "Intel386 Processor
+Supplement", AT&T, 1991, ISBN 0-13-877689-X. (These have details of ELF
+shared library PIC coding.)
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/addsub_n.S b/rts/gmp/mpn/x86/addsub_n.S
new file mode 100644
index 0000000000..fe6f648f53
--- /dev/null
+++ b/rts/gmp/mpn/x86/addsub_n.S
@@ -0,0 +1,174 @@
+/* Currently not working and not used. */
+
+/*
+Copyright (C) 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+
+#define SAVE_BORROW_RESTORE_CARRY(r) adcl r,r; shll $31,r
+#define SAVE_CARRY_RESTORE_BORROW(r) adcl r,r
+
+ .globl mpn_addsub_n_0
+ .globl mpn_addsub_n_1
+
+/* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s2,r2==s1.
+ We let subtraction and addition alternate in being two limbs
+ ahead of the other, thereby avoiding some SAVE_RESTORE. */
+// r1 = r2 + r1 edi = esi + edi
+// r2 = r2 - r1 esi = esi - edi
+// s1 s2
+// r2 r1
+// eax,ebx,ecx,edx,esi,edi,ebp
+mpn_addsub_n_0:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 36(%esp),%ebp /* size */
+
+ shrl $2,%ebp
+ xorl %edx,%edx
+ .align 4
+Loop0: // L=load E=execute S=store
+ movl (%esi),%ebx // sub 0 L
+ movl 4(%esi),%ecx // sub 1 L
+ sbbl (%edi),%ebx // sub 0 LE
+ sbbl 4(%edi),%ecx // sub 1 LE
+// SAVE_BORROW_RESTORE_CARRY(%edx)
+ movl (%esi),%eax // add 0 L
+ adcl %eax,(%edi) // add 0 LES
+ movl 4(%esi),%eax // add 1 L
+ adcl %eax,4(%edi) // add 1 LES
+ movl %ebx,(%esi) // sub 0 S
+ movl %ecx,4(%esi) // sub 1 S
+ movl 8(%esi),%ebx // add 2 L
+ adcl 8(%edi),%ebx // add 2 LE
+ movl 12(%esi),%ecx // add 3 L
+ adcl 12(%edi),%ecx // add 3 LE
+// SAVE_CARRY_RESTORE_BORROW(%edx)
+ movl 8(%edi),%eax // sub 2 L
+ sbbl %eax,8(%esi) // sub 2 LES
+ movl 12(%edi),%eax // sub 3 L
+ sbbl %eax,12(%esi) // sub 3 LES
+ movl %ebx,8(%edi) // add 2 S
+ movl %ecx,12(%edi) // add 3 S
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ebp
+ jnz Loop0
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s1,r2==s2.
+ We let subtraction and addition alternate in being two limbs
+ ahead of the other, thereby avoiding some SAVE_RESTORE. */
+// r1 = r1 + r2 edi = edi + esi
+// r2 = r1 - r2 esi = edi - esi
+// s2 s1
+// r2 r1
+// eax,ebx,ecx,edx,esi,edi,ebp
+mpn_addsub_n_1:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 36(%esp),%ebp /* size */
+
+ shrl $2,%ebp
+ xorl %edx,%edx
+ .align 4
+Loop1: // L=load E=execute S=store
+ movl (%edi),%ebx // sub 0 L
+ sbbl (%esi),%ebx // sub 0 LE
+ movl 4(%edi),%ecx // sub 1 L
+ sbbl 4(%esi),%ecx // sub 1 LE
+// SAVE_BORROW_RESTORE_CARRY(%edx)
+ movl (%esi),%eax // add 0 L
+ adcl %eax,(%edi) // add 0 LES
+ movl 4(%esi),%eax // add 1 L
+ adcl %eax,4(%edi) // add 1 LES
+ movl %ebx,(%esi) // sub 0 S
+ movl %ecx,4(%esi) // sub 1 S
+ movl 8(%esi),%ebx // add 2 L
+ adcl 8(%edi),%ebx // add 2 LE
+ movl 12(%esi),%ecx // add 3 L
+ adcl 12(%edi),%ecx // add 3 LE
+// SAVE_CARRY_RESTORE_BORROW(%edx)
+ movl 8(%edi),%eax // sub 2 L
+ sbbl 8(%esi),%eax // sub 2 LES
+ movl %eax,8(%esi) // sub 2 S
+ movl 12(%edi),%eax // sub 3 L
+ sbbl 12(%esi),%eax // sub 3 LE
+ movl %eax,12(%esi) // sub 3 S
+ movl %ebx,8(%edi) // add 2 S
+ movl %ecx,12(%edi) // add 3 S
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ebp
+ jnz Loop1
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+ .globl mpn_copy
+mpn_copy:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* size */
+
+ shrl $2,%ebp
+ .align 4
+Loop2:
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl %eax,(%edi)
+ movl %ebx,4(%edi)
+ movl 8(%esi),%eax
+ movl 12(%esi),%ebx
+ movl %eax,8(%edi)
+ movl %ebx,12(%edi)
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ebp
+ jnz Loop2
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
diff --git a/rts/gmp/mpn/x86/aors_n.asm b/rts/gmp/mpn/x86/aors_n.asm
new file mode 100644
index 0000000000..18ef816b4d
--- /dev/null
+++ b/rts/gmp/mpn/x86/aors_n.asm
@@ -0,0 +1,187 @@
+dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_add_n',`
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(M4_function_nc)
+deflit(`FRAME',0)
+
+ pushl %edi FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%edx
+ movl PARAM_SIZE,%ecx
+
+ movl %ecx,%eax
+ shrl $3,%ecx C compute count for unrolled loop
+ negl %eax
+ andl $7,%eax C get index where to start loop
+ jz LF(M4_function_n,oopgo) C necessary special case for 0
+ incl %ecx C adjust loop count
+ shll $2,%eax C adjustment for pointers...
+ subl %eax,%edi C ... since they are offset ...
+ subl %eax,%esi C ... by a constant when we ...
+ subl %eax,%edx C ... enter the loop
+ shrl $2,%eax C restore previous value
+
+ifdef(`PIC',`
+ C Calculate start address in loop for PIC. Due to limitations in
+ C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
+ call L(0a)
+L(0a): leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $LF(M4_function_n,oop)-L(0a)-3,%eax
+ addl $4,%esp
+',`
+ C Calculate start address in loop for non-PIC.
+ leal LF(M4_function_n,oop)-3(%eax,%eax,8),%eax
+')
+
+ C These lines initialize carry from the 5th parameter. Should be
+ C possible to simplify.
+ pushl %ebp FRAME_pushl()
+ movl PARAM_CARRY,%ebp
+ shrl $1,%ebp C shift bit 0 into carry
+ popl %ebp FRAME_popl()
+
+ jmp *%eax C jump into loop
+
+EPILOGUE()
+
+
+ ALIGN(8)
+PROLOGUE(M4_function_n)
+deflit(`FRAME',0)
+
+ pushl %edi FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%edx
+ movl PARAM_SIZE,%ecx
+
+ movl %ecx,%eax
+ shrl $3,%ecx C compute count for unrolled loop
+ negl %eax
+ andl $7,%eax C get index where to start loop
+ jz L(oop) C necessary special case for 0
+ incl %ecx C adjust loop count
+ shll $2,%eax C adjustment for pointers...
+ subl %eax,%edi C ... since they are offset ...
+ subl %eax,%esi C ... by a constant when we ...
+ subl %eax,%edx C ... enter the loop
+ shrl $2,%eax C restore previous value
+
+ifdef(`PIC',`
+ C Calculate start address in loop for PIC. Due to limitations in
+ C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
+ call L(0b)
+L(0b): leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $L(oop)-L(0b)-3,%eax
+ addl $4,%esp
+',`
+ C Calculate start address in loop for non-PIC.
+ leal L(oop)-3(%eax,%eax,8),%eax
+')
+ jmp *%eax C jump into loop
+
+L(oopgo):
+ pushl %ebp FRAME_pushl()
+ movl PARAM_CARRY,%ebp
+ shrl $1,%ebp C shift bit 0 into carry
+ popl %ebp FRAME_popl()
+
+ ALIGN(8)
+L(oop): movl (%esi),%eax
+ M4_inst (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ M4_inst 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ M4_inst 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ M4_inst 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ M4_inst 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ M4_inst 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ M4_inst 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ M4_inst 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz L(oop)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/aorsmul_1.asm b/rts/gmp/mpn/x86/aorsmul_1.asm
new file mode 100644
index 0000000000..f32ad83989
--- /dev/null
+++ b/rts/gmp/mpn/x86/aorsmul_1.asm
@@ -0,0 +1,134 @@
+dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
+dnl limb and add the result to a second limb vector.
+
+
+dnl Copyright (C) 1992, 1994, 1997, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_addmul_1',`
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+
+',`ifdef(`OPERATION_submul_1',`
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+
+define(PARAM_MULTIPLIER, `FRAME+16(%esp)')
+define(PARAM_SIZE, `FRAME+12(%esp)')
+define(PARAM_SRC, `FRAME+8(%esp)')
+define(PARAM_DST, `FRAME+4(%esp)')
+
+ TEXT
+ ALIGN(8)
+
+PROLOGUE(M4_function_1)
+deflit(`FRAME',0)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ecx
+
+ xorl %ebx,%ebx
+ andl $3,%ecx
+ jz L(end0)
+
+L(oop0):
+ movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ leal 4(%esi),%esi
+ addl %ebx,%eax
+ movl $0,%ebx
+ adcl %ebx,%edx
+ M4_inst %eax,(%edi)
+ adcl %edx,%ebx C propagate carry into cylimb
+
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oop0)
+
+L(end0):
+ movl PARAM_SIZE,%ecx
+ shrl $2,%ecx
+ jz L(end)
+
+ ALIGN(8)
+L(oop): movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ addl %eax,%ebx
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 4(%esi),%eax
+ mull PARAM_MULTIPLIER
+ M4_inst %ebx,(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl 8(%esi),%eax
+ mull PARAM_MULTIPLIER
+ M4_inst %ebp,4(%edi)
+ adcl %eax,%ebx C new lo + cylimb
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 12(%esi),%eax
+ mull PARAM_MULTIPLIER
+ M4_inst %ebx,8(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ M4_inst %ebp,12(%edi)
+ adcl $0,%ebx C propagate carry into cylimb
+
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ecx
+ jnz L(oop)
+
+L(end): movl %ebx,%eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/copyd.asm b/rts/gmp/mpn/x86/copyd.asm
new file mode 100644
index 0000000000..439640e836
--- /dev/null
+++ b/rts/gmp/mpn/x86/copyd.asm
@@ -0,0 +1,80 @@
+dnl x86 mpn_copyd -- copy limb vector, decrementing.
+dnl
+dnl Future: On P6 an MMX loop should be able to go faster than this code.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from high to low addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+C
+C P5 - 1.0 cycles/limb.
+C
+C P6 - 2.4 cycles/limb, approx 40 cycles startup.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_copyd)
+ C eax saved esi
+ C ebx
+ C ecx counter
+ C edx saved edi
+ C esi src
+ C edi dst
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ movl PARAM_DST, %edi
+ leal -4(%esi,%ecx,4), %esi
+
+ leal -4(%edi,%ecx,4), %edi
+
+ std
+
+ rep
+ movsl
+
+ cld
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/copyi.asm b/rts/gmp/mpn/x86/copyi.asm
new file mode 100644
index 0000000000..5bc4e36689
--- /dev/null
+++ b/rts/gmp/mpn/x86/copyi.asm
@@ -0,0 +1,79 @@
+dnl x86 mpn_copyi -- copy limb vector, incrementing.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from low to high addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+C
+C P5 - 1.0 cycles/limb.
+C
+C P6 - 0.75 cycles/limb. An MMX based copy was tried, but was found to be
+C slower than a rep movs in all cases. The fastest MMX found was 0.8
+C cycles/limb (when fully aligned). A rep movs seems to have a startup
+C time of about 15 cycles, but doing something special for small sizes
+C could lead to a branch misprediction that would destroy any saving.
+C For now a plain rep movs seems ok for P6.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+ C eax saved esi
+ C ebx
+ C ecx counter
+ C edx saved edi
+ C esi src
+ C edi dst
+ C ebp
+
+PROLOGUE(mpn_copyi)
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ movl PARAM_DST, %edi
+
+ cld C better safe than sorry, see mpn/x86/README.family
+
+ rep
+ movsl
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/diveby3.asm b/rts/gmp/mpn/x86/diveby3.asm
new file mode 100644
index 0000000000..df879da9e1
--- /dev/null
+++ b/rts/gmp/mpn/x86/diveby3.asm
@@ -0,0 +1,115 @@
+dnl x86 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl The following all have their own optimized versions of this routine,
+dnl but for reference the code here runs as follows.
+dnl
+dnl cycles/limb
+dnl P54 18.0
+dnl P55 17.0
+dnl P6 14.5
+dnl K6 14.0
+dnl K7 10.0
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+dnl ceil(b/3) and ceil(b*2/3) where b=2^32
+deflit(ONE_THIRD_CEIL, 0x55555556)
+deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %ecx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_SIZE, %ebp
+ pushl %edi FRAME_pushl()
+
+ movl PARAM_DST, %edi
+ pushl %esi FRAME_pushl()
+
+ movl $INVERSE_3, %esi
+ pushl %ebx FRAME_pushl()
+
+ leal (%ecx,%ebp,4), %ecx
+ movl PARAM_CARRY, %ebx
+
+ leal (%edi,%ebp,4), %edi
+ negl %ebp
+
+
+ ALIGN(8)
+L(top):
+ C eax scratch, low product
+ C ebx carry limb (0 to 3)
+ C ecx &src[size]
+ C edx scratch, high product
+ C esi multiplier
+ C edi &dst[size]
+ C ebp counter, limbs, negative
+
+ movl (%ecx,%ebp,4), %eax
+
+ subl %ebx, %eax
+
+ setc %bl
+
+ imull %esi
+
+ cmpl $ONE_THIRD_CEIL, %eax
+ movl %eax, (%edi,%ebp,4)
+
+ sbbl $-1, %ebx C +1 if eax>=ceil(b/3)
+ cmpl $TWO_THIRDS_CEIL, %eax
+
+ sbbl $-1, %ebx C +1 if eax>=ceil(b*2/3)
+ incl %ebp
+
+ jnz L(top)
+
+
+ movl %ebx, %eax
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/divrem_1.asm b/rts/gmp/mpn/x86/divrem_1.asm
new file mode 100644
index 0000000000..12f14676d6
--- /dev/null
+++ b/rts/gmp/mpn/x86/divrem_1.asm
@@ -0,0 +1,232 @@
+dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl cycles/limb
+dnl K6 20
+dnl P5 44
+dnl P6 39
+dnl 486 approx 43 maybe
+dnl
+dnl
+dnl The following have their own optimized divrem_1 implementations, but
+dnl for reference the code here runs as follows.
+dnl
+dnl cycles/limb
+dnl P6MMX 39
+dnl K7 42
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C
+C Divide src,size by divisor and store the quotient in dst+xsize,size.
+C Extend the division to fractional quotient limbs in dst,xsize. Return the
+C remainder. Either or both xsize and size can be 0.
+C
+C mpn_divrem_1c takes a carry parameter which is an initial high limb,
+C effectively one extra limb at the top of src,size. Must have
+C carry<divisor.
+C
+C
+C Essentially the code is the same as the division based part of
+C mpn/generic/divrem_1.c, but has the following advantages.
+C
+C - If gcc isn't being used then divrem_1.c will get the generic C
+C udiv_qrnnd() and be rather slow.
+C
+C - On K6, using the loop instruction is a 10% speedup, but gcc doesn't
+C generate that instruction (as of gcc 2.95.2 at least).
+C
+C A test is done to see if the high limb is less the the divisor, and if so
+C one less div is done. A div is between 20 and 40 cycles on the various
+C x86s, so assuming high<divisor about half the time, then this test saves
+C half that amount. The branch misprediction penalty on each chip is less
+C than half a div.
+C
+C
+C K6: Back-to-back div instructions run at 20 cycles, the same as the loop
+C here, so it seems there's nothing to gain by rearranging the loop.
+C Pairing the mov and loop instructions was found to gain nothing. (The
+C same is true of the mpn/x86/mod_1.asm loop.)
+C
+C With a "decl/jnz" rather than a "loop" this code runs at 22 cycles.
+C The loop_or_decljnz macro is an easy way to get a 10% speedup.
+C
+C The fast K6 multiply might be thought to suit a multiply-by-inverse,
+C but that algorithm has been found to suffer from the releatively poor
+C carry handling on K6 and too many auxiliary instructions. The
+C fractional part however could be done at about 13 c/l.
+C
+C P5: Moving the load down to pair with the store might save 1 cycle, but
+C that doesn't seem worth bothering with, since it'd be only a 2.2%
+C saving.
+C
+C Again here the auxiliary instructions hinder a multiply-by-inverse,
+C though there might be a 10-15% speedup available
+
+
+defframe(PARAM_CARRY, 24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC, 12)
+defframe(PARAM_XSIZE, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %edi FRAME_pushl()
+
+ movl PARAM_SRC, %edi
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_DST, %ebx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_XSIZE, %ebp
+ orl %ecx, %ecx
+
+ movl PARAM_CARRY, %edx
+ jz LF(mpn_divrem_1,fraction)
+
+ leal -4(%ebx,%ebp,4), %ebx C dst one limb below integer part
+ jmp LF(mpn_divrem_1,integer_top)
+
+EPILOGUE()
+
+
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %edi FRAME_pushl()
+
+ movl PARAM_SRC, %edi
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ orl %ecx,%ecx
+
+ jz L(size_zero)
+ pushl %ebx FRAME_pushl()
+
+ movl -4(%edi,%ecx,4), %eax C src high limb
+ xorl %edx, %edx
+
+ movl PARAM_DST, %ebx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_XSIZE, %ebp
+ cmpl %esi, %eax
+
+ leal -4(%ebx,%ebp,4), %ebx C dst one limb below integer part
+ jae L(integer_entry)
+
+
+ C high<divisor, so high of dst is zero, and avoid one div
+
+ movl %edx, (%ebx,%ecx,4)
+ decl %ecx
+
+ movl %eax, %edx
+ jz L(fraction)
+
+
+L(integer_top):
+ C eax scratch (quotient)
+ C ebx dst+4*xsize-4
+ C ecx counter
+ C edx scratch (remainder)
+ C esi divisor
+ C edi src
+ C ebp xsize
+
+ movl -4(%edi,%ecx,4), %eax
+L(integer_entry):
+
+ divl %esi
+
+ movl %eax, (%ebx,%ecx,4)
+ loop_or_decljnz L(integer_top)
+
+
+L(fraction):
+ orl %ebp, %ecx
+ jz L(done)
+
+ movl PARAM_DST, %ebx
+
+
+L(fraction_top):
+ C eax scratch (quotient)
+ C ebx dst
+ C ecx counter
+ C edx scratch (remainder)
+ C esi divisor
+ C edi
+ C ebp
+
+ xorl %eax, %eax
+
+ divl %esi
+
+ movl %eax, -4(%ebx,%ecx,4)
+ loop_or_decljnz L(fraction_top)
+
+
+L(done):
+ popl %ebp
+ movl %edx, %eax
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+L(size_zero):
+deflit(`FRAME',8)
+ movl PARAM_XSIZE, %ecx
+ xorl %eax, %eax
+
+ movl PARAM_DST, %edi
+
+ cld C better safe than sorry, see mpn/x86/README.family
+
+ rep
+ stosl
+
+ popl %esi
+ popl %edi
+ ret
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/README b/rts/gmp/mpn/x86/k6/README
new file mode 100644
index 0000000000..3ad96c8b89
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/README
@@ -0,0 +1,237 @@
+
+ AMD K6 MPN SUBROUTINES
+
+
+
+This directory contains code optimized for AMD K6 CPUs, meaning K6, K6-2 and
+K6-3.
+
+The mmx and k62mmx subdirectories have routines using MMX instructions. All
+K6s have MMX, the separate directories are just so that ./configure can omit
+them if the assembler doesn't support MMX.
+
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache, are as follows.
+
+ cycles/limb
+
+ mpn_add_n/sub_n 3.25 normal, 2.75 in-place
+
+ mpn_mul_1 6.25
+ mpn_add/submul_1 7.65-8.4 (varying with data values)
+
+ mpn_mul_basecase 9.25 cycles/crossproduct (approx)
+ mpn_sqr_basecase 4.7 cycles/crossproduct (approx)
+ or 9.2 cycles/triangleproduct (approx)
+
+ mpn_divrem_1 20.0
+ mpn_mod_1 20.0
+ mpn_divexact_by3 11.0
+
+ mpn_l/rshift 3.0
+
+ mpn_copyi/copyd 1.0
+
+ mpn_com_n 1.5-1.85 \
+ mpn_and/andn/ior/xor_n 1.5-1.75 | varying with
+ mpn_iorn/xnor_n 2.0-2.25 | data alignment
+ mpn_nand/nior_n 2.0-2.25 /
+
+ mpn_popcount 12.5
+ mpn_hamdist 13.0
+
+
+K6-2 and K6-3 have dual-issue MMX and get the following improvements.
+
+ mpn_l/rshift 1.75
+
+ mpn_copyi/copyd 0.56 or 1.0 \
+ |
+ mpn_com_n 1.0-1.2 | varying with
+ mpn_and/andn/ior/xor_n 1.2-1.5 | data alignment
+ mpn_iorn/xnor_n 1.5-2.0 |
+ mpn_nand/nior_n 1.75-2.0 /
+
+ mpn_popcount 9.0
+ mpn_hamdist 11.5
+
+
+Prefetching of sources hasn't yet given any joy. With the 3DNow "prefetch"
+instruction, code seems to run slower, and with just "mov" loads it doesn't
+seem faster. Results so far are inconsistent. The K6 does a hardware
+prefetch of the second cache line in a sector, so the penalty for not
+prefetching in software is reduced.
+
+
+
+
+NOTES
+
+All K6 family chips have MMX, but only K6-2 and K6-3 have 3DNow.
+
+Plain K6 executes MMX instructions only in the X pipe, but K6-2 and K6-3 can
+execute them in both X and Y (and together).
+
+Branch misprediction penalty is 1 to 4 cycles (Optimization Manual
+chapter 6 table 12).
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+Store queue is 7 entries of 64 bits each.
+
+Floating point multiplications can be done in parallel with integer
+multiplications, but there doesn't seem to be any way to make use of this.
+
+
+
+OPTIMIZATIONS
+
+Unrolled loops are used to reduce looping overhead. The unrolling is
+configurable up to 32 limbs/loop for most routines, up to 64 for some.
+
+Sometimes computed jumps into the unrolling are used to handle sizes not a
+multiple of the unrolling. An attractive feature of this is that times
+smoothly increase with operand size, but an indirect jump is about 6 cycles
+and the setups about another 6, so it depends on how much the unrolled code
+is faster than a simple loop as to whether a computed jump ought to be used.
+
+Position independent code is implemented using a call to get eip for
+computed jumps and a ret is always done, rather than an addl $4,%esp or a
+popl, so the CPU return address branch prediction stack stays synchronised
+with the actual stack in memory. Such a call however still costs 4 to 7
+cycles.
+
+Branch prediction, in absence of any history, will guess forward jumps are
+not taken and backward jumps are taken. Where possible it's arranged that
+the less likely or less important case is under a taken forward jump.
+
+
+
+MMX
+
+Putting emms or femms as late as possible in a routine seems to be fastest.
+Perhaps an emms or femms stalls until all outstanding MMX instructions have
+completed, so putting it later gives them a chance to complete on their own,
+in parallel with other operations (like register popping).
+
+The Optimization Manual chapter 5 recommends using a femms on K6-2 and K6-3
+at the start of a routine, in case it's been preceded by x87 floating point
+operations. This isn't done because in gmp programs it's expected that x87
+floating point won't be much used and that chances are an mpn routine won't
+have been preceded by any x87 code.
+
+
+
+CODING
+
+Instructions in general code are shown paired if they can decode and execute
+together, meaning two short decode instructions with the second not
+depending on the first, only the first using the shifter, no more than one
+load, and no more than one store.
+
+K6 does some out of order execution so the pairings aren't essential, they
+just show what slots might be available. When decoding is the limiting
+factor things can be scheduled that might not execute until later.
+
+
+
+NOTES
+
+Code alignment
+
+- if an opcode/modrm or 0Fh/opcode/modrm crosses a cache line boundary,
+ short decode is inhibited. The cross.pl script detects this.
+
+- loops and branch targets should be aligned to 16 bytes, or ensure at least
+ 2 instructions before a 32 byte boundary. This makes use of the 16 byte
+ cache in the BTB.
+
+Addressing modes
+
+- (%esi) degrades decoding from short to vector. 0(%esi) doesn't have this
+ problem, and can be used as an equivalent, or easier is just to use a
+ different register, like %ebx.
+
+- K6 and pre-CXT core K6-2 have the following problem. (K6-2 CXT and K6-3
+ have it fixed, these being cpuid function 1 signatures 0x588 to 0x58F).
+
+ If more than 3 bytes are needed to determine instruction length then
+ decoding degrades from direct to long, or from long to vector. This
+ happens with forms like "0F opcode mod/rm" with mod/rm=00-xxx-100 since
+ with mod=00 the sib determines whether there's a displacement.
+
+ This affects all MMX and 3DNow instructions, and others with an 0F prefix
+ like movzbl. The modes affected are anything with an index and no
+ displacement, or an index but no base, and this includes (%esp) which is
+ really (,%esp,1).
+
+ The cross.pl script detects problem cases. The workaround is to always
+ use a displacement, and to do this with Zdisp if it's zero so the
+ assembler doesn't discard it.
+
+ See Optimization Manual rev D page 67 and 3DNow Porting Guide rev B pages
+ 13-14 and 36-37.
+
+Calls
+
+- indirect jumps and calls are not branch predicted, they measure about 6
+ cycles.
+
+Various
+
+- adcl 2 cycles of decode, maybe 2 cycles executing in the X pipe
+- bsf 12-27 cycles
+- emms 5 cycles
+- femms 3 cycles
+- jecxz 2 cycles taken, 13 not taken (optimization manual says 7 not taken)
+- divl 20 cycles back-to-back
+- imull 2 decode, 2 execute
+- mull 2 decode, 3 execute (optimization manual decoding sample)
+- prefetch 2 cycles
+- rcll/rcrl implicit by one bit: 2 cycles
+ immediate or %cl count: 11 + 2 per bit for dword
+ 13 + 4 per bit for byte
+- setCC 2 cycles
+- xchgl %eax,reg 1.5 cycles, back-to-back (strange)
+ reg,reg 2 cycles, back-to-back
+
+
+
+
+REFERENCES
+
+"AMD-K6 Processor Code Optimization Application Note", AMD publication
+number 21924, revision D amendment 0, January 2000. This describes K6-2 and
+K6-3. Available on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21924.pdf
+
+"AMD-K6 MMX Enhanced Processor x86 Code Optimization Application Note", AMD
+publication number 21828, revision A amendment 0, August 1997. This is an
+older edition of the above document, describing plain K6. Available
+on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21828.pdf
+
+"3DNow Technology Manual", AMD publication number 21928F/0-August 1999.
+This describes the femms and prefetch instructions, but nothing else from
+3DNow has been used. Available on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21928.pdf
+
+"3DNow Instruction Porting Guide", AMD publication number 22621, revision B,
+August 1999. This has some notes on general K6 optimizations as well as
+3DNow. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22621.pdf
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/k6/aors_n.asm b/rts/gmp/mpn/x86/k6/aors_n.asm
new file mode 100644
index 0000000000..31b05ada51
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/aors_n.asm
@@ -0,0 +1,329 @@
+dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
+dnl
+dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_add_n', `
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+ define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+ define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size. The return value is the carry bit from the top of the result
+C (1 or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation. Note values other than 1 or 0 here will lead to garbage
+C results.
+C
+C Instruction decoding limits a normal dst=src1+src2 operation to 3 c/l, and
+C an in-place dst+=src to 2.5 c/l. The unrolled loops have 1 cycle/loop of
+C loop control, which with 4 limbs/loop means an extra 0.25 c/l.
+
+define(PARAM_CARRY, `FRAME+20(%esp)')
+define(PARAM_SIZE, `FRAME+16(%esp)')
+define(PARAM_SRC2, `FRAME+12(%esp)')
+define(PARAM_SRC1, `FRAME+8(%esp)')
+define(PARAM_DST, `FRAME+4(%esp)')
+deflit(`FRAME',0)
+
+dnl minimum 5 because the unrolled code can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function_nc)
+ movl PARAM_CARRY, %eax
+ jmp LF(M4_function_n,start)
+EPILOGUE()
+
+
+PROLOGUE(M4_function_n)
+ xorl %eax, %eax
+L(start):
+ movl PARAM_SIZE, %ecx
+ pushl %ebx
+FRAME_pushl()
+
+ movl PARAM_SRC1, %ebx
+ pushl %edi
+FRAME_pushl()
+
+ movl PARAM_SRC2, %edx
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_DST, %edi
+ jae L(unroll)
+
+
+ shrl %eax C initial carry flag
+
+ C offset 0x21 here, close enough to aligned
+L(simple):
+ C eax scratch
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+ C
+ C The store to (%edi) could be done with a stosl; it'd be smaller
+ C code, but there's no speed gain and a cld would have to be added
+ C (per mpn/x86/README.family).
+
+ movl (%ebx), %eax
+ leal 4(%ebx), %ebx
+
+ M4_inst (%edx), %eax
+
+ movl %eax, (%edi)
+ leal 4(%edi), %edi
+
+ leal 4(%edx), %edx
+ loop L(simple)
+
+
+ movl $0, %eax
+ popl %edi
+
+ setc %al
+
+ popl %ebx
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(unroll):
+ C eax carry
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+
+ cmpl %edi, %ebx
+ pushl %esi
+
+ je L(inplace)
+
+ifdef(`OPERATION_add_n',`
+ cmpl %edi, %edx
+
+ je L(inplace_reverse)
+')
+
+ movl %ecx, %esi
+
+ andl $-4, %ecx
+ andl $3, %esi
+
+ leal (%ebx,%ecx,4), %ebx
+ leal (%edx,%ecx,4), %edx
+ leal (%edi,%ecx,4), %edi
+
+ negl %ecx
+ shrl %eax
+
+ ALIGN(32)
+L(normal_top):
+ C eax counter, qwords, negative
+ C ebx src1
+ C ecx scratch
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+
+ movl (%ebx,%ecx,4), %eax
+ leal 5(%ecx), %ecx
+ M4_inst -20(%edx,%ecx,4), %eax
+ movl %eax, -20(%edi,%ecx,4)
+
+ movl 4-20(%ebx,%ecx,4), %eax
+ M4_inst 4-20(%edx,%ecx,4), %eax
+ movl %eax, 4-20(%edi,%ecx,4)
+
+ movl 8-20(%ebx,%ecx,4), %eax
+ M4_inst 8-20(%edx,%ecx,4), %eax
+ movl %eax, 8-20(%edi,%ecx,4)
+
+ movl 12-20(%ebx,%ecx,4), %eax
+ M4_inst 12-20(%edx,%ecx,4), %eax
+ movl %eax, 12-20(%edi,%ecx,4)
+
+ loop L(normal_top)
+
+
+ decl %esi
+ jz L(normal_finish_one)
+ js L(normal_done)
+
+ C two or three more limbs
+
+ movl (%ebx), %eax
+ M4_inst (%edx), %eax
+ movl %eax, (%edi)
+
+ movl 4(%ebx), %eax
+ M4_inst 4(%edx), %eax
+ decl %esi
+ movl %eax, 4(%edi)
+
+ jz L(normal_done)
+ movl $2, %ecx
+
+L(normal_finish_one):
+ movl (%ebx,%ecx,4), %eax
+ M4_inst (%edx,%ecx,4), %eax
+ movl %eax, (%edi,%ecx,4)
+
+L(normal_done):
+ popl %esi
+ popl %edi
+
+ movl $0, %eax
+ popl %ebx
+
+ setc %al
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+
+ifdef(`OPERATION_add_n',`
+L(inplace_reverse):
+ C dst==src2
+
+ movl %ebx, %edx
+')
+
+L(inplace):
+ C eax initial carry
+ C ebx
+ C ecx size
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ leal -1(%ecx), %esi
+ decl %ecx
+
+ andl $-4, %ecx
+ andl $3, %esi
+
+ movl (%edx), %ebx C src low limb
+ leal (%edx,%ecx,4), %edx
+
+ leal (%edi,%ecx,4), %edi
+ negl %ecx
+
+ shrl %eax
+
+
+ ALIGN(32)
+L(inplace_top):
+ C eax
+ C ebx next src limb
+ C ecx size
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ M4_inst %ebx, (%edi,%ecx,4)
+
+ movl 4(%edx,%ecx,4), %eax
+ leal 5(%ecx), %ecx
+
+ M4_inst %eax, 4-20(%edi,%ecx,4)
+
+ movl 8-20(%edx,%ecx,4), %eax
+ movl 12-20(%edx,%ecx,4), %ebx
+
+ M4_inst %eax, 8-20(%edi,%ecx,4)
+ M4_inst %ebx, 12-20(%edi,%ecx,4)
+
+ movl 16-20(%edx,%ecx,4), %ebx
+ loop L(inplace_top)
+
+
+ C now %esi is 0 to 3 representing respectively 1 to 4 limbs more
+
+ M4_inst %ebx, (%edi)
+
+ decl %esi
+ jz L(inplace_finish_one)
+ js L(inplace_done)
+
+ C two or three more limbs
+
+ movl 4(%edx), %eax
+ movl 8(%edx), %ebx
+ M4_inst %eax, 4(%edi)
+ M4_inst %ebx, 8(%edi)
+
+ decl %esi
+ movl $2, %ecx
+
+ jz L(normal_done)
+
+L(inplace_finish_one):
+ movl 4(%edx,%ecx,4), %eax
+ M4_inst %eax, 4(%edi,%ecx,4)
+
+L(inplace_done):
+ popl %esi
+ popl %edi
+
+ movl $0, %eax
+ popl %ebx
+
+ setc %al
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/aorsmul_1.asm b/rts/gmp/mpn/x86/k6/aorsmul_1.asm
new file mode 100644
index 0000000000..da4120fe2f
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/aorsmul_1.asm
@@ -0,0 +1,372 @@
+dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl
+dnl K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data),
+dnl PIC adds about 6 cycles at the start.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6: large multpliers small multpliers
+dnl UNROLL_COUNT cycles/limb cycles/limb
+dnl 4 9.5 7.78
+dnl 8 9.0 7.78
+dnl 16 8.4 7.65
+dnl 32 8.4 8.2
+dnl
+dnl Maximum possible unrolling with the current code is 32.
+dnl
+dnl Unrolling to 16 limbs/loop makes the unrolled loop fit exactly in a 256
+dnl byte block, which might explain the good speed at that unrolling.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1', `
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+ define(M4_function_1c, mpn_addmul_1c)
+ define(M4_description, add it to)
+ define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1', `
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+ define(M4_function_1c, mpn_submul_1c)
+ define(M4_description, subtract it from)
+ define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+C
+C The jadcl0()s in the unrolled loop makes the speed data dependent. Small
+C multipliers (most significant few bits clear) result in few carry bits and
+C speeds up to 7.65 cycles/limb are attained. Large multipliers (most
+C significant few bits set) make the carry bits 50/50 and lead to something
+C more like 8.4 c/l. (With adcl's both of these would be 9.3 c/l.)
+C
+C It's important that the gains for jadcl0 on small multipliers don't come
+C at the cost of slowing down other data. Tests on uniformly distributed
+C random data, designed to confound branch prediction, show about a 7%
+C speed-up using jadcl0 over adcl (8.93 versus 9.57 cycles/limb, with all
+C overheads included).
+C
+C In the simple loop, jadcl0() measures slower than adcl (11.9-14.7 versus
+C 11.0 cycles/limb), and hence isn't used.
+C
+C In the simple loop, note that running ecx from negative to zero and using
+C it as an index in the two movs wouldn't help. It would save one
+C instruction (2*addl+loop becoming incl+jnz), but there's nothing unpaired
+C that would be collapsed by this.
+C
+C
+C jadcl0
+C ------
+C
+C jadcl0() being faster than adcl $0 seems to be an artifact of two things,
+C firstly the instruction decoding and secondly the fact that there's a
+C carry bit for the jadcl0 only on average about 1/4 of the time.
+C
+C The code in the unrolled loop decodes something like the following.
+C
+C decode cycles
+C mull %ebp 2
+C M4_inst %esi, disp(%edi) 1
+C adcl %eax, %ecx 2
+C movl %edx, %esi \ 1
+C jnc 1f /
+C incl %esi \ 1
+C 1: movl disp(%ebx), %eax /
+C ---
+C 7
+C
+C In a back-to-back style test this measures 7 with the jnc not taken, or 8
+C with it taken (both when correctly predicted). This is opposite to the
+C measurements showing small multipliers running faster than large ones.
+C Watch this space for more info ...
+C
+C It's not clear how much branch misprediction might be costing. The K6
+C doco says it will be 1 to 4 cycles, but presumably it's near the low end
+C of that range to get the measured results.
+C
+C
+C In the code the two carries are more or less the preceding mul product and
+C the calculation is roughly
+C
+C x*y + u*b+v
+C
+C where b=2^32 is the size of a limb, x*y is the two carry limbs, and u and
+C v are the two limbs it's added to (being the low of the next mul, and a
+C limb from the destination).
+C
+C To get a carry requires x*y+u*b+v >= b^2, which is u*b+v >= b^2-x*y, and
+C there are b^2-(b^2-x*y) = x*y many such values, giving a probability of
+C x*y/b^2. If x, y, u and v are random and uniformly distributed between 0
+C and b-1, then the total probability can be summed over x and y,
+C
+C 1 b-1 b-1 x*y 1 b*(b-1) b*(b-1)
+C --- * sum sum --- = --- * ------- * ------- = 1/4
+C b^2 x=0 y=1 b^2 b^4 2 2
+C
+C Actually it's a very tiny bit less than 1/4 of course. If y is fixed,
+C then the probability is 1/2*y/b thus varying linearly between 0 and 1/2.
+
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 9)
+',`
+deflit(UNROLL_THRESHOLD, 6)
+')
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function_1c)
+ pushl %esi
+deflit(`FRAME',4)
+ movl PARAM_CARRY, %esi
+ jmp LF(M4_function_1,start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function_1)
+ push %esi
+deflit(`FRAME',4)
+ xorl %esi, %esi C initial carry
+
+L(start_nc):
+ movl PARAM_SIZE, %ecx
+ pushl %ebx
+deflit(`FRAME',8)
+
+ movl PARAM_SRC, %ebx
+ pushl %edi
+deflit(`FRAME',12)
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_DST, %edi
+
+ pushl %ebp
+deflit(`FRAME',16)
+ jae L(unroll)
+
+
+ C simple loop
+
+ movl PARAM_MULTIPLIER, %ebp
+
+L(simple):
+ C eax scratch
+ C ebx src
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi dst
+ C ebp multiplier
+
+ movl (%ebx), %eax
+ addl $4, %ebx
+
+ mull %ebp
+
+ addl $4, %edi
+ addl %esi, %eax
+
+ adcl $0, %edx
+
+ M4_inst %eax, -4(%edi)
+
+ adcl $0, %edx
+
+ movl %edx, %esi
+ loop L(simple)
+
+
+ popl %ebp
+ popl %edi
+
+ popl %ebx
+ movl %esi, %eax
+
+ popl %esi
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+C The unrolled loop uses a "two carry limbs" scheme. At the top of the loop
+C the carries are ecx=lo, esi=hi, then they swap for each limb processed.
+C For the computed jump an odd size means they start one way around, an even
+C size the other.
+C
+C VAR_JUMP holds the computed jump temporarily because there's not enough
+C registers at the point of doing the mul for the initial two carry limbs.
+C
+C The add/adc for the initial carry in %esi is necessary only for the
+C mpn_addmul/submul_1c entry points. Duplicating the startup code to
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
+C idea.
+
+dnl overlapping with parameters already fetched
+define(VAR_COUNTER, `PARAM_SIZE')
+define(VAR_JUMP, `PARAM_DST')
+
+L(unroll):
+ C eax
+ C ebx src
+ C ecx size
+ C edx
+ C esi initial carry
+ C edi dst
+ C ebp
+
+ movl %ecx, %edx
+ decl %ecx
+
+ subl $2, %edx
+ negl %ecx
+
+ shrl $UNROLL_LOG2, %edx
+ andl $UNROLL_MASK, %ecx
+
+ movl %edx, VAR_COUNTER
+ movl %ecx, %edx
+
+ shll $4, %edx
+ negl %ecx
+
+ C 15 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%edx,%ecx,1), %edx
+')
+ movl (%ebx), %eax C src low limb
+
+ movl PARAM_MULTIPLIER, %ebp
+ movl %edx, VAR_JUMP
+
+ mull %ebp
+
+ addl %esi, %eax C initial carry (from _1c)
+ jadcl0( %edx)
+
+
+ leal 4(%ebx,%ecx,4), %ebx
+ movl %edx, %esi C high carry
+
+ movl VAR_JUMP, %edx
+ leal (%edi,%ecx,4), %edi
+
+ testl $1, %ecx
+ movl %eax, %ecx C low carry
+
+ jz L(noswap)
+ movl %esi, %ecx C high,low carry other way around
+
+ movl %eax, %esi
+L(noswap):
+
+ jmp *%edx
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%edx,%ecx,1), %edx
+ addl $L(entry)-L(here), %edx
+ addl (%esp), %edx
+ ret
+')
+
+
+C -----------------------------------------------------------
+ ALIGN(32)
+L(top):
+deflit(`FRAME',16)
+ C eax scratch
+ C ebx src
+ C ecx carry lo
+ C edx scratch
+ C esi carry hi
+ C edi dst
+ C ebp multiplier
+ C
+ C 15 code bytes per limb
+
+ leal UNROLL_BYTES(%edi), %edi
+
+L(entry):
+forloop(`i', 0, UNROLL_COUNT/2-1, `
+ deflit(`disp0', eval(2*i*4))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%ebx), %eax)
+ mull %ebp
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+ adcl %eax, %esi
+ movl %edx, %ecx
+ jadcl0( %ecx)
+
+ movl disp1(%ebx), %eax
+ mull %ebp
+ M4_inst %esi, disp1(%edi)
+ adcl %eax, %ecx
+ movl %edx, %esi
+ jadcl0( %esi)
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%ebx), %ebx
+
+ jns L(top)
+
+
+ popl %ebp
+ M4_inst %ecx, UNROLL_BYTES(%edi)
+
+ popl %edi
+ movl %esi, %eax
+
+ popl %ebx
+ jadcl0( %eax)
+
+ popl %esi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/cross.pl b/rts/gmp/mpn/x86/k6/cross.pl
new file mode 100644
index 0000000000..21734f3e52
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/cross.pl
@@ -0,0 +1,141 @@
+#! /usr/bin/perl
+
+# Copyright (C) 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# Usage: cross.pl [filename.o]...
+#
+# Produce an annotated disassembly of the given object files, indicating
+# certain code alignment and addressing mode problems afflicting K6 chips.
+# "ZZ" is used on all annotations, so this can be searched for.
+#
+# With no arguments, all .o files corresponding to .asm files are processed.
+# This is good in the mpn object directory of a k6*-*-* build.
+#
+# As far as fixing problems goes, any cache line crossing problems in loops
+# get attention, but as a rule it's too tedious to rearrange code or slip in
+# nops to fix every problem in setup or finishup code.
+#
+# Bugs:
+#
+# Instructions without mod/rm bytes or which are already vector decoded are
+# unaffected by cache line boundary crossing, but not all of these have yet
+# been put in as exceptions. All that occur in practice in GMP are present
+# though.
+#
+# There's no messages for using the vector decoded addressing mode (%esi),
+# but that mode is easy to avoid when coding.
+
+use strict;
+
+sub disassemble {
+ my ($file) = @_;
+ my ($addr,$b1,$b2,$b3, $prefix,$opcode,$modrm);
+
+ open (IN, "objdump -Srfh $file |")
+ || die "Cannot open pipe from objdump\n";
+ while (<IN>) {
+ print;
+
+ if (/^[ \t]*[0-9]+[ \t]+\.text[ \t]/ && /2\*\*([0-9]+)$/) {
+ if ($1 < 5) {
+ print "ZZ need at least 2**5 for predictable cache line crossing\n";
+ }
+ }
+
+ if (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
+ ($addr,$b1,$b2,$b3) = ($1,$2,$3,$4);
+
+ } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
+ ($addr,$b1,$b2,$b3) = ($1,$2,$3,'');
+
+ } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)/) {
+ ($addr,$b1,$b2,$b3) = ($1,$2,'','');
+
+ } else {
+ next;
+ }
+
+ if ($b1 =~ /0f/) {
+ $prefix = $b1;
+ $opcode = $b2;
+ $modrm = $b3;
+ } else {
+ $prefix = '';
+ $opcode = $b1;
+ $modrm = $b2;
+ }
+
+ # modrm of the form 00-xxx-100 with an 0F prefix is the problem case
+ # for K6 and pre-CXT K6-2
+ if ($prefix =~ /0f/
+ && $opcode !~ /^8/ # jcond disp32
+ && $modrm =~ /^[0-3][4c]/) {
+ print "ZZ ($file) >3 bytes to determine instruction length\n";
+ }
+
+ # with just an opcode, starting 1f mod 20h
+ if ($addr =~ /[13579bdf]f$/
+ && $prefix !~ /0f/
+ && $opcode !~ /1[012345]/ # adc
+ && $opcode !~ /1[89abcd]/ # sbb
+ && $opcode !~ /68/ # push $imm32
+ && $opcode !~ /^7/ # jcond disp8
+ && $opcode !~ /a[89]/ # test+imm
+ && $opcode !~ /a[a-f]/ # stos/lods/scas
+ && $opcode !~ /b8/ # movl $imm32,%eax
+ && $opcode !~ /e[0123]/ # loop/loopz/loopnz/jcxz
+ && $opcode !~ /e[b9]/ # jmp disp8/disp32
+ && $opcode !~ /f[89abcd]/ # clc,stc,cli,sti,cld,std
+ && !($opcode =~ /f[67]/ # grp 1
+ && $modrm =~ /^[2367abef]/) # mul, imul, div, idiv
+ && $modrm !~ /^$/) {
+ print "ZZ ($file) opcode/modrm cross 32-byte boundary\n";
+ }
+
+ # with an 0F prefix, anything starting at 1f mod 20h
+ if ($addr =~ /[13579bdf][f]$/
+ && $prefix =~ /0f/) {
+ print "ZZ ($file) prefix/opcode cross 32-byte boundary\n";
+ }
+
+ # with an 0F prefix, anything with mod/rm starting at 1e mod 20h
+ if ($addr =~ /[13579bdf][e]$/
+ && $prefix =~ /0f/
+ && $opcode !~ /^8/ # jcond disp32
+ && $modrm !~ /^$/) {
+ print "ZZ ($file) prefix/opcode/modrm cross 32-byte boundary\n";
+ }
+ }
+ close IN || die "Error from objdump (or objdump not available)\n";
+}
+
+
+my @files;
+if ($#ARGV >= 0) {
+ @files = @ARGV;
+} else {
+ @files = glob "*.asm";
+ map {s/.asm/.o/} @files;
+}
+
+foreach (@files) {
+ disassemble($_);
+}
diff --git a/rts/gmp/mpn/x86/k6/diveby3.asm b/rts/gmp/mpn/x86/k6/diveby3.asm
new file mode 100644
index 0000000000..ffb97bc380
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/diveby3.asm
@@ -0,0 +1,110 @@
+dnl AMD K6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl K6: 11.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+C
+C Using %esi in (%esi,%ecx,4) or 0(%esi,%ecx,4) addressing modes doesn't
+C lead to vector decoding, unlike plain (%esi) does.
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %esi defframe_pushl(SAVE_ESI)
+
+ movl PARAM_SRC, %esi
+ pushl %edi defframe_pushl(SAVE_EDI)
+
+ movl PARAM_DST, %edi
+ pushl %ebx defframe_pushl(SAVE_EBX)
+
+ movl PARAM_CARRY, %ebx
+ leal (%esi,%ecx,4), %esi
+
+ pushl $3 defframe_pushl(VAR_THREE)
+ leal (%edi,%ecx,4), %edi
+
+ negl %ecx
+
+
+ C Need 32 alignment for claimed speed, to avoid the movl store
+ C opcode/modrm crossing a cache line boundary
+
+ ALIGN(32)
+L(top):
+ C eax scratch, low product
+ C ebx carry limb (0 to 3)
+ C ecx counter, limbs, negative
+ C edx scratch, high product
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp
+ C
+ C The 0(%esi,%ecx,4) form pads so the finishup "movl %ebx, %eax"
+ C doesn't cross a 32 byte boundary, saving a couple of cycles
+ C (that's a fixed couple, not per loop).
+
+Zdisp( movl, 0,(%esi,%ecx,4), %eax)
+ subl %ebx, %eax
+
+ setc %bl
+
+ imull $INVERSE_3, %eax
+
+ movl %eax, (%edi,%ecx,4)
+ addl $2, %ecx
+
+ mull VAR_THREE
+
+ addl %edx, %ebx
+ loop L(top)
+
+
+ movl SAVE_ESI, %esi
+ movl %ebx, %eax
+
+ movl SAVE_EBX, %ebx
+
+ movl SAVE_EDI, %edi
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/gmp-mparam.h b/rts/gmp/mpn/x86/k6/gmp-mparam.h
new file mode 100644
index 0000000000..77f3948d77
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/gmp-mparam.h
@@ -0,0 +1,97 @@
+/* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 3 /* cycles */
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME 20 /* cycles */
+#endif
+
+/* bsfl takes 12-27 cycles, put an average for uniform random numbers */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 14 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-04. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 18
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 130
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 34
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 116
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 68
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 98
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 13
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 67
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 472
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 4352
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 544
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 4352
+#endif
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm b/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm
new file mode 100644
index 0000000000..20a33e6ccf
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm
@@ -0,0 +1,179 @@
+dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing.
+dnl
+dnl K6-2: 0.56 or 1.0 cycles/limb (at 32 limbs/loop), depending on data
+dnl alignment.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6-2 aligned:
+dnl UNROLL_COUNT cycles/limb
+dnl 8 0.75
+dnl 16 0.625
+dnl 32 0.5625
+dnl 64 0.53
+dnl Maximum possible with the current code is 64, the minimum is 2.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, processing limbs from high to low addresses.
+C
+C The comments in copyi.asm apply here too.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_copyd)
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ std
+
+ movl PARAM_DST, %edi
+ cmpl $UNROLL_COUNT, %ecx
+
+ leal -4(%esi,%ecx,4), %esi
+
+ leal -4(%edi,%ecx,4), %edi
+ ja L(unroll)
+
+L(simple):
+ rep
+ movsl
+
+ cld
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+
+L(unroll):
+ C if src and dst are different alignments mod8, then use rep movs
+ C if src and dst are both 4mod8 then process one limb to get 0mod8
+
+ pushl %ebx
+ leal (%esi,%edi), %ebx
+
+ testb $4, %bl
+ popl %ebx
+
+ jnz L(simple)
+ testl $4, %esi
+
+ leal -UNROLL_COUNT(%ecx), %ecx
+ jnz L(already_aligned)
+
+ movsl
+
+ decl %ecx
+L(already_aligned):
+
+
+ifelse(UNROLL_BYTES,256,`
+ subl $128, %esi
+ subl $128, %edi
+')
+
+ C offset 0x3D here, but gets full speed without further alignment
+L(top):
+ C eax saved esi
+ C ebx
+ C ecx counter, limbs
+ C edx saved edi
+ C esi src, incrementing
+ C edi dst, incrementing
+ C ebp
+ C
+ C `disp' is never 0, so don't need to force 0(%esi).
+
+deflit(CHUNK_COUNT, 2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp', eval(-4-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,+128)))
+ movq disp(%esi), %mm0
+ movq %mm0, disp(%edi)
+')
+
+ leal -UNROLL_BYTES(%esi), %esi
+ subl $UNROLL_COUNT, %ecx
+
+ leal -UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+ C now %ecx is -UNROLL_COUNT to -1 representing repectively 0 to
+ C UNROLL_COUNT-1 limbs remaining
+
+ testb $eval(UNROLL_COUNT/2), %cl
+
+ leal UNROLL_COUNT(%ecx), %ecx
+ jz L(not_half)
+
+
+ C at an unroll count of 32 this block of code is 16 cycles faster than
+ C the rep movs, less 3 or 4 to test whether to do it
+
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT/2-1, `
+ deflit(`disp', eval(-4-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,+128)))
+ movq disp(%esi), %mm0
+ movq %mm0, disp(%edi)
+')
+
+ subl $eval(UNROLL_BYTES/2), %esi
+ subl $eval(UNROLL_BYTES/2), %edi
+
+ subl $eval(UNROLL_COUNT/2), %ecx
+L(not_half):
+
+
+ifelse(UNROLL_BYTES,256,`
+ addl $128, %esi
+ addl $128, %edi
+')
+
+ rep
+ movsl
+
+ cld
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm b/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm
new file mode 100644
index 0000000000..215d805f2e
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm
@@ -0,0 +1,196 @@
+dnl AMD K6-2 mpn_copyi -- copy limb vector, incrementing.
+dnl
+dnl K6-2: 0.56 or 1.0 cycles/limb (at 32 limbs/loop), depending on data
+dnl alignment.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6-2 aligned:
+dnl UNROLL_COUNT cycles/limb
+dnl 8 0.75
+dnl 16 0.625
+dnl 32 0.5625
+dnl 64 0.53
+dnl Maximum possible with the current code is 64, the minimum is 2.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The MMX loop is faster than a rep movs when src and dst are both 0mod8.
+C With one 0mod8 and one 4mod8 it's 1.056 c/l and the rep movs at 1.0 c/l is
+C used instead.
+C
+C mod8
+C src dst
+C 0 0 both aligned, use mmx
+C 0 4 unaligned, use rep movs
+C 4 0 unaligned, use rep movs
+C 4 4 do one movs, then both aligned, use mmx
+C
+C The MMX code on aligned data is 0.5 c/l, plus loop overhead of 2
+C cycles/loop, which is 0.0625 c/l at 32 limbs/loop.
+C
+C A pattern of two movq loads and two movq stores (or four and four) was
+C tried, but found to be the same speed as just one of each.
+C
+C Note that this code only suits K6-2 and K6-3. Plain K6 does only one mmx
+C instruction per cycle, so "movq"s are no faster than the simple 1 c/l rep
+C movs.
+C
+C Enhancement:
+C
+C Addressing modes like disp(%esi,%ecx,4) aren't currently used. They'd
+C make it possible to avoid incrementing %esi and %edi in the loop and hence
+C get loop overhead down to 1 cycle. Care would be needed to avoid bad
+C cache line crossings since the "movq"s would then be 5 code bytes rather
+C than 4.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_copyi)
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ cld
+
+ movl PARAM_DST, %edi
+ cmpl $UNROLL_COUNT, %ecx
+
+ ja L(unroll)
+
+L(simple):
+ rep
+ movsl
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+
+L(unroll):
+ C if src and dst are different alignments mod8, then use rep movs
+ C if src and dst are both 4mod8 then process one limb to get 0mod8
+
+ pushl %ebx
+ leal (%esi,%edi), %ebx
+
+ testb $4, %bl
+ popl %ebx
+
+ jnz L(simple)
+ testl $4, %esi
+
+ leal -UNROLL_COUNT(%ecx), %ecx
+ jz L(already_aligned)
+
+ decl %ecx
+
+ movsl
+L(already_aligned):
+
+
+ifelse(UNROLL_BYTES,256,`
+ addl $128, %esi
+ addl $128, %edi
+')
+
+ C this is offset 0x34, no alignment needed
+L(top):
+ C eax saved esi
+ C ebx
+ C ecx counter, limbs
+ C edx saved edi
+ C esi src, incrementing
+ C edi dst, incrementing
+ C ebp
+ C
+ C Zdisp gets 0(%esi) left that way to avoid vector decode, and with
+ C 0(%edi) keeps code aligned to 16 byte boundaries.
+
+deflit(CHUNK_COUNT, 2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+Zdisp( movq, disp,(%esi), %mm0)
+Zdisp( movq, %mm0, disp,(%edi))
+')
+
+ addl $UNROLL_BYTES, %esi
+ subl $UNROLL_COUNT, %ecx
+
+ leal UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+ C now %ecx is -UNROLL_COUNT to -1 representing repectively 0 to
+ C UNROLL_COUNT-1 limbs remaining
+
+ testb $eval(UNROLL_COUNT/2), %cl
+
+ leal UNROLL_COUNT(%ecx), %ecx
+ jz L(not_half)
+
+ C at an unroll count of 32 this block of code is 16 cycles faster than
+ C the rep movs, less 3 or 4 to test whether to do it
+
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT/2-1, `
+ deflit(`disp', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ movq disp(%esi), %mm0
+ movq %mm0, disp(%edi)
+')
+ addl $eval(UNROLL_BYTES/2), %esi
+ addl $eval(UNROLL_BYTES/2), %edi
+
+ subl $eval(UNROLL_COUNT/2), %ecx
+L(not_half):
+
+
+ifelse(UNROLL_BYTES,256,`
+ subl $128, %esi
+ subl $128, %edi
+')
+
+ rep
+ movsl
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm b/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm
new file mode 100644
index 0000000000..f6d54f97a8
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm
@@ -0,0 +1,286 @@
+dnl AMD K6-2 mpn_lshift -- mpn left shift.
+dnl
+dnl K6-2: 1.75 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl used after src has been fetched
+define(VAR_RETVAL,`PARAM_SRC')
+
+dnl minimum 9, because unrolled loop can't handle less
+deflit(UNROLL_THRESHOLD, 9)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shldl( %cl, %edx, %eax) C return value
+
+ shll %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16) C avoid offset 0x1f
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx,%eax,4), %edx C src high limb
+ negl %ecx
+
+ movd PARAM_SHIFT, %mm6
+ addl $32, %ecx C 32-shift
+
+ shrl %cl, %edx
+ cmpl $UNROLL_THRESHOLD-1, %eax
+
+ movl %edx, VAR_RETVAL
+ jae L(unroll)
+
+
+ movd %ecx, %mm7
+ movl %eax, %ecx
+
+ movl PARAM_DST, %eax
+
+L(simple):
+ C eax dst
+ C ebx src
+ C ecx counter, size-1 to 1
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%ebx,%ecx,4), %mm0
+
+ psrlq %mm7, %mm0
+
+Zdisp( movd, %mm0, 0,(%eax,%ecx,4))
+ loop L(simple)
+
+
+ movd (%ebx), %mm0
+ popl %ebx
+
+ psllq %mm6, %mm0
+
+ movd %mm0, (%eax)
+ movl %edx, %eax
+
+ femms
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx src
+ C ecx 32-shift
+ C edx retval (but instead VAR_RETVAL is used)
+ C
+ C mm6 shift
+
+ addl $32, %ecx
+ movl PARAM_DST, %edx
+
+ movd %ecx, %mm7
+ subl $7, %eax C size-8
+
+ leal (%edx,%eax,4), %ecx C alignment of dst
+
+ movq 32-8(%ebx,%eax,4), %mm2 C src high qword
+ testb $4, %cl
+
+ jz L(dst_aligned)
+ psllq %mm6, %mm2
+
+ psrlq $32, %mm2
+ decl %eax
+
+ movd %mm2, 32(%edx,%eax,4) C dst high limb
+ movq 32-8(%ebx,%eax,4), %mm2 C new src high qword
+L(dst_aligned):
+
+ movq 32-16(%ebx,%eax,4), %mm0 C src second highest qword
+
+
+ C This loop is the important bit, the rest is just support for it.
+ C Four src limbs are held at the start, and four more will be read.
+ C Four dst limbs will be written. This schedule seems necessary for
+ C full speed.
+ C
+ C The use of size-8 lets the loop stop when %eax goes negative and
+ C leaves -4 to -1 which can be tested with test $1 and $2.
+
+L(top):
+ C eax counter, size-8 step by -4 until <0
+ C ebx src
+ C ecx
+ C edx dst
+ C
+ C mm0 src next qword
+ C mm1 scratch
+ C mm2 src prev qword
+ C mm6 shift
+ C mm7 64-shift
+
+ psllq %mm6, %mm2
+ subl $4, %eax
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm0, %mm2
+ movq 24(%ebx,%eax,4), %mm0
+
+ psllq %mm6, %mm1
+ movq %mm2, 40(%edx,%eax,4)
+
+ movq %mm0, %mm2
+ psrlq %mm7, %mm0
+
+ por %mm0, %mm1
+ movq 16(%ebx,%eax,4), %mm0
+
+ movq %mm1, 32(%edx,%eax,4)
+ jnc L(top)
+
+
+ C Now have four limbs in mm2 (prev) and mm0 (next), plus eax mod 4.
+ C
+ C 8(%ebx) is the next source, and 24(%edx) is the next destination.
+ C %eax is between -4 and -1, representing respectively 0 to 3 extra
+ C limbs that must be read.
+
+
+ testl $2, %eax C testl to avoid bad cache line crossing
+ jz L(finish_nottwo)
+
+ C Two more limbs: lshift mm2, OR it with rshifted mm0, mm0 becomes
+ C new mm2 and a new mm0 is loaded.
+
+ psllq %mm6, %mm2
+ movq %mm0, %mm1
+
+ psrlq %mm7, %mm0
+ subl $2, %eax
+
+ por %mm0, %mm2
+ movq 16(%ebx,%eax,4), %mm0
+
+ movq %mm2, 32(%edx,%eax,4)
+ movq %mm1, %mm2
+L(finish_nottwo):
+
+
+ C lshift mm2, OR with rshifted mm0, mm1 becomes lshifted mm0
+
+ testb $1, %al
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm0, %mm2
+ psllq %mm6, %mm1
+
+ movq %mm2, 24(%edx,%eax,4)
+ jz L(finish_even)
+
+
+ C Size is odd, so mm1 and one extra limb to process.
+
+ movd (%ebx), %mm0 C src[0]
+ popl %ebx
+deflit(`FRAME',0)
+
+ movq %mm0, %mm2
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ psllq %mm6, %mm2
+ por %mm0, %mm1
+
+ movq %mm1, 4(%edx) C dst[1,2]
+ movd %mm2, (%edx) C dst[0]
+
+ movl VAR_RETVAL, %eax
+
+ femms
+ ret
+
+
+ nop C avoid bad cache line crossing
+L(finish_even):
+deflit(`FRAME',4)
+ C Size is even, so only mm1 left to process.
+
+ movq %mm1, (%edx) C dst[0,1]
+ movl VAR_RETVAL, %eax
+
+ popl %ebx
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm b/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm
new file mode 100644
index 0000000000..8a8c144241
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm
@@ -0,0 +1,285 @@
+dnl AMD K6-2 mpn_rshift -- mpn right shift.
+dnl
+dnl K6-2: 1.75 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl Minimum 9, because the unrolled loop can't handle less.
+dnl
+deflit(UNROLL_THRESHOLD, 9)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shrdl( %cl, %edx, %eax) C return value
+
+ shrl %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16) C avoid offset 0x1f
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx), %edx C src low limb
+ negl %ecx
+
+ addl $32, %ecx
+ movd PARAM_SHIFT, %mm6
+
+ shll %cl, %edx
+ cmpl $UNROLL_THRESHOLD-1, %eax
+
+ jae L(unroll)
+
+
+ C eax size-1
+ C ebx src
+ C ecx 32-shift
+ C edx retval
+ C
+ C mm6 shift
+
+ movl PARAM_DST, %ecx
+ leal (%ebx,%eax,4), %ebx
+
+ leal -4(%ecx,%eax,4), %ecx
+ negl %eax
+
+ C This loop runs at about 3 cycles/limb, which is the amount of
+ C decoding, and this is despite every second access being unaligned.
+
+L(simple):
+ C eax counter, -(size-1) to -1
+ C ebx &src[size-1]
+ C ecx &dst[size-1]
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+
+Zdisp( movq, 0,(%ebx,%eax,4), %mm0)
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+Zdisp( movd, %mm0, 0,(%ecx,%eax,4))
+ jnz L(simple)
+
+
+ movq %mm0, (%ecx)
+ movl %edx, %eax
+
+ popl %ebx
+
+ femms
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx src
+ C ecx 32-shift
+ C edx retval
+ C
+ C mm6 shift
+
+ addl $32, %ecx
+ subl $7, %eax C size-8
+
+ movd %ecx, %mm7
+ movl PARAM_DST, %ecx
+
+ movq (%ebx), %mm2 C src low qword
+ leal (%ebx,%eax,4), %ebx C src end - 32
+
+ testb $4, %cl
+ leal (%ecx,%eax,4), %ecx C dst end - 32
+
+ notl %eax C -(size-7)
+ jz L(dst_aligned)
+
+ psrlq %mm6, %mm2
+ incl %eax
+
+Zdisp( movd, %mm2, 0,(%ecx,%eax,4)) C dst low limb
+ movq 4(%ebx,%eax,4), %mm2 C new src low qword
+L(dst_aligned):
+
+ movq 12(%ebx,%eax,4), %mm0 C src second lowest qword
+ nop C avoid bad cache line crossing
+
+
+ C This loop is the important bit, the rest is just support for it.
+ C Four src limbs are held at the start, and four more will be read.
+ C Four dst limbs will be written. This schedule seems necessary for
+ C full speed.
+ C
+ C The use of -(size-7) lets the loop stop when %eax becomes >= 0 and
+ C and leaves 0 to 3 which can be tested with test $1 and $2.
+
+L(top):
+ C eax counter, -(size-7) step by +4 until >=0
+ C ebx src end - 32
+ C ecx dst end - 32
+ C edx retval
+ C
+ C mm0 src next qword
+ C mm1 scratch
+ C mm2 src prev qword
+ C mm6 shift
+ C mm7 64-shift
+
+ psrlq %mm6, %mm2
+ addl $4, %eax
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ por %mm0, %mm2
+ movq 4(%ebx,%eax,4), %mm0
+
+ psrlq %mm6, %mm1
+ movq %mm2, -12(%ecx,%eax,4)
+
+ movq %mm0, %mm2
+ psllq %mm7, %mm0
+
+ por %mm0, %mm1
+ movq 12(%ebx,%eax,4), %mm0
+
+ movq %mm1, -4(%ecx,%eax,4)
+ ja L(top) C jump if no carry and not zero
+
+
+
+ C Now have the four limbs in mm2 (low) and mm0 (high), and %eax is 0
+ C to 3 representing respectively 3 to 0 further limbs.
+
+ testl $2, %eax C testl to avoid bad cache line crossings
+ jnz L(finish_nottwo)
+
+ C Two or three extra limbs: rshift mm2, OR it with lshifted mm0, mm0
+ C becomes new mm2 and a new mm0 is loaded.
+
+ psrlq %mm6, %mm2
+ movq %mm0, %mm1
+
+ psllq %mm7, %mm0
+ addl $2, %eax
+
+ por %mm0, %mm2
+ movq 12(%ebx,%eax,4), %mm0
+
+ movq %mm2, -4(%ecx,%eax,4)
+ movq %mm1, %mm2
+L(finish_nottwo):
+
+
+ testb $1, %al
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ por %mm0, %mm2
+ psrlq %mm6, %mm1
+
+ movq %mm2, 4(%ecx,%eax,4)
+ jnz L(finish_even)
+
+
+ C one further extra limb to process
+
+ movd 32-4(%ebx), %mm0 C src[size-1], most significant limb
+ popl %ebx
+
+ movq %mm0, %mm2
+ psllq %mm7, %mm0
+
+ por %mm0, %mm1
+ psrlq %mm6, %mm2
+
+ movq %mm1, 32-12(%ecx) C dst[size-3,size-2]
+ movd %mm2, 32-4(%ecx) C dst[size-1]
+
+ movl %edx, %eax C retval
+
+ femms
+ ret
+
+
+ nop C avoid bad cache line crossing
+L(finish_even):
+ C no further extra limbs
+
+ movq %mm1, 32-8(%ecx) C dst[size-2,size-1]
+ movl %edx, %eax C retval
+
+ popl %ebx
+
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/com_n.asm b/rts/gmp/mpn/x86/k6/mmx/com_n.asm
new file mode 100644
index 0000000000..8915080f0f
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/com_n.asm
@@ -0,0 +1,91 @@
+dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement.
+dnl
+dnl alignment dst/src, A=0mod8 N=4mod8
+dnl A/A A/N N/A N/N
+dnl K6-2 1.0 1.18 1.18 1.18 cycles/limb
+dnl K6 1.5 1.85 1.75 1.85
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Take the bitwise ones-complement of src,size and write it to dst,size.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_com_n)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %edx
+ shrl %ecx
+ jnz L(two_or_more)
+
+ movl (%eax), %eax
+ notl %eax
+ movl %eax, (%edx)
+ ret
+
+
+L(two_or_more):
+ pushl %ebx
+FRAME_pushl()
+ movl %ecx, %ebx
+
+ pcmpeqd %mm7, %mm7 C all ones
+
+
+ ALIGN(16)
+L(top):
+ C eax src
+ C ebx floor(size/2)
+ C ecx counter
+ C edx dst
+ C esi
+ C edi
+ C ebp
+
+ movq -8(%eax,%ecx,8), %mm0
+ pxor %mm7, %mm0
+ movq %mm0, -8(%edx,%ecx,8)
+ loop L(top)
+
+
+ jnc L(no_extra)
+ movl (%eax,%ebx,8), %eax
+ notl %eax
+ movl %eax, (%edx,%ebx,8)
+L(no_extra):
+
+ popl %ebx
+ emms_or_femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/logops_n.asm b/rts/gmp/mpn/x86/k6/mmx/logops_n.asm
new file mode 100644
index 0000000000..46cb3b7ea5
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/logops_n.asm
@@ -0,0 +1,212 @@
+dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
+dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
+dnl
+dnl alignment dst/src1/src2, A=0mod8, N=4mod8
+dnl A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
+dnl
+dnl K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor
+dnl K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor
+dnl K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior
+dnl
+dnl K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor
+dnl K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor
+dnl K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl M4_p and M4_i are the MMX and integer instructions
+dnl M4_*_neg_dst means whether to negate the final result before writing
+dnl M4_*_neg_src2 means whether to negate the src2 values before using them
+
+define(M4_choose_op,
+m4_assert_numargs(7)
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_operation', `$1')
+define(`M4_p', `$2')
+define(`M4_p_neg_dst', `$3')
+define(`M4_p_neg_src2',`$4')
+define(`M4_i', `$5')
+define(`M4_i_neg_dst', `$6')
+define(`M4_i_neg_src2',`$7')
+')')
+
+dnl xnor is done in "iorn" style because it's a touch faster than "nior"
+dnl style (the two are equivalent for xor).
+
+M4_choose_op( and_n, pand,0,0, andl,0,0)
+M4_choose_op( andn_n, pandn,0,0, andl,0,1)
+M4_choose_op( nand_n, pand,1,0, andl,1,0)
+M4_choose_op( ior_n, por,0,0, orl,0,0)
+M4_choose_op( iorn_n, por,0,1, orl,0,1)
+M4_choose_op( nior_n, por,1,0, orl,1,0)
+M4_choose_op( xor_n, pxor,0,0, xorl,0,0)
+M4_choose_op( xnor_n, pxor,0,1, xorl,0,1)
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+
+C void M4_function (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C
+C Do src1,size M4_operation src2,size, storing the result in dst,size.
+C
+C Unaligned movq loads and stores are a bit slower than aligned ones. The
+C test at the start of the routine checks the alignment of src1 and if
+C necessary processes one limb separately at the low end to make it aligned.
+C
+C The raw speeds without this alignment switch are as follows.
+C
+C alignment dst/src1/src2, A=0mod8, N=4mod8
+C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
+C
+C K6 1.5 2.0 1.5 2.0 and,andn,ior,xor
+C K6 1.75 2.2 2.0 2.28 iorn,xnor
+C K6 2.0 2.25 2.35 2.28 nand,nior
+C
+C
+C Future:
+C
+C K6 can do one 64-bit load per cycle so each of these routines should be
+C able to approach 1.0 c/l, if aligned. The basic and/andn/ior/xor might be
+C able to get 1.0 with just a 4 limb loop, being 3 instructions per 2 limbs.
+C The others are 4 instructions per 2 limbs, and so can only approach 1.0
+C because there's nowhere to hide some loop control.
+
+defframe(PARAM_SIZE,16)
+defframe(PARAM_SRC2,12)
+defframe(PARAM_SRC1,8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+PROLOGUE(M4_function)
+ movl PARAM_SIZE, %ecx
+ pushl %ebx
+ FRAME_pushl()
+ movl PARAM_SRC1, %eax
+ movl PARAM_SRC2, %ebx
+ cmpl $1, %ecx
+ movl PARAM_DST, %edx
+ ja L(two_or_more)
+
+
+ movl (%ebx), %ecx
+ popl %ebx
+ifelse(M4_i_neg_src2,1,`notl %ecx')
+ M4_i (%eax), %ecx
+ifelse(M4_i_neg_dst,1,` notl %ecx')
+ movl %ecx, (%edx)
+
+ ret
+
+
+L(two_or_more):
+ C eax src1
+ C ebx src2
+ C ecx size
+ C edx dst
+ C esi
+ C edi
+ C ebp
+ C
+ C carry bit is low of size
+
+ pushl %esi
+ FRAME_pushl()
+ testl $4, %eax
+ jz L(alignment_ok)
+
+ movl (%ebx), %esi
+ addl $4, %ebx
+ifelse(M4_i_neg_src2,1,`notl %esi')
+ M4_i (%eax), %esi
+ addl $4, %eax
+ifelse(M4_i_neg_dst,1,` notl %esi')
+ movl %esi, (%edx)
+ addl $4, %edx
+ decl %ecx
+
+L(alignment_ok):
+ movl %ecx, %esi
+ shrl %ecx
+ jnz L(still_two_or_more)
+
+ movl (%ebx), %ecx
+ popl %esi
+ifelse(M4_i_neg_src2,1,`notl %ecx')
+ M4_i (%eax), %ecx
+ifelse(M4_i_neg_dst,1,` notl %ecx')
+ popl %ebx
+ movl %ecx, (%edx)
+ ret
+
+
+L(still_two_or_more):
+ifelse(eval(M4_p_neg_src2 || M4_p_neg_dst),1,`
+ pcmpeqd %mm7, %mm7 C all ones
+')
+
+ ALIGN(16)
+L(top):
+ C eax src1
+ C ebx src2
+ C ecx counter
+ C edx dst
+ C esi
+ C edi
+ C ebp
+ C
+ C carry bit is low of size
+
+ movq -8(%ebx,%ecx,8), %mm0
+ifelse(M4_p_neg_src2,1,`pxor %mm7, %mm0')
+ M4_p -8(%eax,%ecx,8), %mm0
+ifelse(M4_p_neg_dst,1,` pxor %mm7, %mm0')
+ movq %mm0, -8(%edx,%ecx,8)
+
+ loop L(top)
+
+
+ jnc L(no_extra)
+
+ movl -4(%ebx,%esi,4), %ebx
+ifelse(M4_i_neg_src2,1,`notl %ebx')
+ M4_i -4(%eax,%esi,4), %ebx
+ifelse(M4_i_neg_dst,1,` notl %ebx')
+ movl %ebx, -4(%edx,%esi,4)
+L(no_extra):
+
+ popl %esi
+ popl %ebx
+ emms_or_femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/lshift.asm b/rts/gmp/mpn/x86/k6/mmx/lshift.asm
new file mode 100644
index 0000000000..f1dc83db46
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/lshift.asm
@@ -0,0 +1,122 @@
+dnl AMD K6 mpn_lshift -- mpn left shift.
+dnl
+dnl K6: 3.0 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx
+C instructions. This is despite every second fetch being unaligned.
+
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shldl( %cl, %edx, %eax) C return value
+
+ shll %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+ ALIGN(16) C avoid offset 0x1f
+ nop C avoid bad cache line crossing
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx,%eax,4), %edx C src high limb
+ negl %ecx
+
+ movd PARAM_SHIFT, %mm6
+ addl $32, %ecx C 32-shift
+
+ shrl %cl, %edx
+
+ movd %ecx, %mm7
+ movl PARAM_DST, %ecx
+
+L(top):
+ C eax counter, size-1 to 1
+ C ebx src
+ C ecx dst
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%ebx,%eax,4), %mm0
+ decl %eax
+
+ psrlq %mm7, %mm0
+
+ movd %mm0, 4(%ecx,%eax,4)
+ jnz L(top)
+
+
+ movd (%ebx), %mm0
+ popl %ebx
+
+ psllq %mm6, %mm0
+ movl %edx, %eax
+
+ movd %mm0, (%ecx)
+
+ emms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/popham.asm b/rts/gmp/mpn/x86/k6/mmx/popham.asm
new file mode 100644
index 0000000000..2c619252bb
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/popham.asm
@@ -0,0 +1,238 @@
+dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and
+dnl hamming distance.
+dnl
+dnl popcount hamdist
+dnl K6-2: 9.0 11.5 cycles/limb
+dnl K6: 12.5 13.0
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C The code here isn't optimal, but it's already a 2x speedup over the plain
+C integer mpn/generic/popcount.c,hamdist.c.
+
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist
+')m4exit(1)')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+ dnl non-PIC
+
+ DATA
+ ALIGN(8)
+
+define(LS,
+m4_assert_numargs(1)
+`LF(M4_function,`$1')')
+
+LS(rodata_AAAAAAAAAAAAAAAA):
+ .long 0xAAAAAAAA
+ .long 0xAAAAAAAA
+
+LS(rodata_3333333333333333):
+ .long 0x33333333
+ .long 0x33333333
+
+LS(rodata_0F0F0F0F0F0F0F0F):
+ .long 0x0F0F0F0F
+ .long 0x0F0F0F0F
+
+LS(rodata_000000FF000000FF):
+ .long 0x000000FF
+ .long 0x000000FF
+')
+
+ .text
+ ALIGN(32)
+
+POP(`ifdef(`PIC', `
+ C avoid shrl crossing a 32-byte boundary
+ nop')')
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ orl %ecx, %ecx
+ jz L(zero)
+
+ifdef(`PIC',`
+ movl $0xAAAAAAAA, %eax
+ movl $0x33333333, %edx
+
+ movd %eax, %mm7
+ movd %edx, %mm6
+
+ movl $0x0F0F0F0F, %eax
+ movl $0x000000FF, %edx
+
+ punpckldq %mm7, %mm7
+ punpckldq %mm6, %mm6
+
+ movd %eax, %mm5
+ movd %edx, %mm4
+
+ punpckldq %mm5, %mm5
+ punpckldq %mm4, %mm4
+',`
+
+ movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7
+ movq LS(rodata_3333333333333333), %mm6
+ movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5
+ movq LS(rodata_000000FF000000FF), %mm4
+')
+
+define(REG_AAAAAAAAAAAAAAAA, %mm7)
+define(REG_3333333333333333, %mm6)
+define(REG_0F0F0F0F0F0F0F0F, %mm5)
+define(REG_000000FF000000FF, %mm4)
+
+
+ movl PARAM_SRC, %eax
+HAM(` movl PARAM_SRC2, %edx')
+
+ pxor %mm2, %mm2 C total
+
+ shrl %ecx
+ jnc L(top)
+
+Zdisp( movd, 0,(%eax,%ecx,8), %mm1)
+
+HAM(`
+Zdisp( movd, 0,(%edx,%ecx,8), %mm0)
+ pxor %mm0, %mm1
+')
+
+ incl %ecx
+ jmp L(loaded)
+
+
+ ALIGN(16)
+POP(` nop C alignment to avoid crossing 32-byte boundaries')
+
+L(top):
+ C eax src
+ C ebx
+ C ecx counter, qwords, decrementing
+ C edx [hamdist] src2
+ C
+ C mm0 (scratch)
+ C mm1 (scratch)
+ C mm2 total (low dword)
+ C mm3
+ C mm4 \
+ C mm5 | special constants
+ C mm6 |
+ C mm7 /
+
+ movq -8(%eax,%ecx,8), %mm1
+HAM(` pxor -8(%edx,%ecx,8), %mm1')
+
+L(loaded):
+ movq %mm1, %mm0
+ pand REG_AAAAAAAAAAAAAAAA, %mm1
+
+ psrlq $1, %mm1
+HAM(` nop C code alignment')
+
+ psubd %mm1, %mm0 C bit pairs
+HAM(` nop C code alignment')
+
+
+ movq %mm0, %mm1
+ psrlq $2, %mm0
+
+ pand REG_3333333333333333, %mm0
+ pand REG_3333333333333333, %mm1
+
+ paddd %mm1, %mm0 C nibbles
+
+
+ movq %mm0, %mm1
+ psrlq $4, %mm0
+
+ pand REG_0F0F0F0F0F0F0F0F, %mm0
+ pand REG_0F0F0F0F0F0F0F0F, %mm1
+
+ paddd %mm1, %mm0 C bytes
+
+ movq %mm0, %mm1
+ psrlq $8, %mm0
+
+
+ paddb %mm1, %mm0 C words
+
+
+ movq %mm0, %mm1
+ psrlq $16, %mm0
+
+ paddd %mm1, %mm0 C dwords
+
+ pand REG_000000FF000000FF, %mm0
+
+ paddd %mm0, %mm2 C low to total
+ psrlq $32, %mm0
+
+ paddd %mm0, %mm2 C high to total
+ loop L(top)
+
+
+
+ movd %mm2, %eax
+ emms_or_femms
+ ret
+
+L(zero):
+ movl $0, %eax
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/rshift.asm b/rts/gmp/mpn/x86/k6/mmx/rshift.asm
new file mode 100644
index 0000000000..cc5948f26c
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/rshift.asm
@@ -0,0 +1,122 @@
+dnl AMD K6 mpn_rshift -- mpn right shift.
+dnl
+dnl K6: 3.0 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx
+C instructions. This is despite every second fetch being unaligned.
+
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shrdl( %cl, %edx, %eax) C return value
+
+ shrl %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+ ALIGN(16) C avoid offset 0x1f
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx), %edx C src low limb
+ negl %ecx
+
+ addl $32, %ecx C 32-shift
+ movd PARAM_SHIFT, %mm6
+
+ shll %cl, %edx C retval
+ movl PARAM_DST, %ecx
+
+ leal (%ebx,%eax,4), %ebx
+
+ leal -4(%ecx,%eax,4), %ecx
+ negl %eax
+
+
+L(simple):
+ C eax counter (negative)
+ C ebx &src[size-1]
+ C ecx &dst[size-1]
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+
+Zdisp( movq, 0,(%ebx,%eax,4), %mm0)
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+Zdisp( movd, %mm0, 0,(%ecx,%eax,4))
+ jnz L(simple)
+
+
+ movq %mm0, (%ecx)
+ movl %edx, %eax
+
+ popl %ebx
+
+ emms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mul_1.asm b/rts/gmp/mpn/x86/k6/mul_1.asm
new file mode 100644
index 0000000000..c2220fe4ca
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mul_1.asm
@@ -0,0 +1,272 @@
+dnl AMD K6 mpn_mul_1 -- mpn by limb multiply.
+dnl
+dnl K6: 6.25 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier, mp_limb_t carry);
+C
+C Multiply src,size by mult and store the result in dst,size.
+C Return the carry limb from the top of the result.
+C
+C mpn_mul_1c() accepts an initial carry for the calculation, it's added into
+C the low limb of the result.
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl minimum 5 because the unrolled code can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_mul_1c)
+ pushl %esi
+deflit(`FRAME',4)
+ movl PARAM_CARRY, %esi
+ jmp LF(mpn_mul_1,start_nc)
+EPILOGUE()
+
+
+PROLOGUE(mpn_mul_1)
+ push %esi
+deflit(`FRAME',4)
+ xorl %esi, %esi C initial carry
+
+L(start_nc):
+ mov PARAM_SIZE, %ecx
+ push %ebx
+FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ push %edi
+FRAME_pushl()
+
+ movl PARAM_DST, %edi
+ pushl %ebp
+FRAME_pushl()
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_MULTIPLIER, %ebp
+
+ jae L(unroll)
+
+
+ C code offset 0x22 here, close enough to aligned
+L(simple):
+ C eax scratch
+ C ebx src
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi dst
+ C ebp multiplier
+ C
+ C this loop 8 cycles/limb
+
+ movl (%ebx), %eax
+ addl $4, %ebx
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi)
+ addl $4, %edi
+
+ loop L(simple)
+
+
+ popl %ebp
+
+ popl %edi
+ popl %ebx
+
+ movl %esi, %eax
+ popl %esi
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C The code for each limb is 6 cycles, with instruction decoding being the
+C limiting factor. At 4 limbs/loop and 1 cycle/loop of overhead it's 6.25
+C cycles/limb in total.
+C
+C The secret ingredient to get 6.25 is to start the loop with the mul and
+C have the load/store pair at the end. Rotating the load/store to the top
+C is an 0.5 c/l slowdown. (Some address generation effect probably.)
+C
+C The whole unrolled loop fits nicely in exactly 80 bytes.
+
+
+ ALIGN(16) C already aligned to 16 here actually
+L(unroll):
+ movl (%ebx), %eax
+ leal -16(%ebx,%ecx,4), %ebx
+
+ leal -16(%edi,%ecx,4), %edi
+ subl $4, %ecx
+
+ negl %ecx
+
+
+ ALIGN(16) C one byte nop for this alignment
+L(top):
+ C eax scratch
+ C ebx &src[size-4]
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi &dst[size-4]
+ C ebp multiplier
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi,%ecx,4)
+ movl 4(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 4(%edi,%ecx,4)
+ movl 8(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 8(%edi,%ecx,4)
+ movl 12(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 12(%edi,%ecx,4)
+ movl 16(%ebx,%ecx,4), %eax
+
+
+ addl $4, %ecx
+ js L(top)
+
+
+
+ C eax next src limb
+ C ebx &src[size-4]
+ C ecx 0 to 3 representing respectively 4 to 1 further limbs
+ C edx
+ C esi carry
+ C edi &dst[size-4]
+
+ testb $2, %cl
+ jnz L(finish_not_two)
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi,%ecx,4)
+ movl 4(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 4(%edi,%ecx,4)
+ movl 8(%ebx,%ecx,4), %eax
+
+ addl $2, %ecx
+L(finish_not_two):
+
+
+ testb $1, %cl
+ jnz L(finish_not_one)
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 8(%edi)
+ movl 12(%ebx), %eax
+L(finish_not_one):
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ popl %ebp
+
+ adcl $0, %edx
+
+ movl %eax, 12(%edi)
+ popl %edi
+
+ popl %ebx
+ movl %edx, %eax
+
+ popl %esi
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mul_basecase.asm b/rts/gmp/mpn/x86/k6/mul_basecase.asm
new file mode 100644
index 0000000000..1f5a3a4b4b
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mul_basecase.asm
@@ -0,0 +1,600 @@
+dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers.
+dnl
+dnl K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop
+dnl unrolling).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6: UNROLL_COUNT cycles/product (approx)
+dnl 8 9.75
+dnl 16 9.3
+dnl 32 9.3
+dnl Maximum possible with the current code is 32.
+dnl
+dnl With 16 the inner unrolled loop fits exactly in a 256 byte block, which
+dnl might explain it's good performance.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+C
+C Calculate xp,xsize multiplied by yp,ysize, storing the result in
+C wp,xsize+ysize.
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() entry code only
+C once. The saving is about 10-20% on typical sizes coming from the
+C Karatsuba multiply code.
+C
+C Future:
+C
+C The unrolled loop could be shared by mpn_addmul_1, with some extra stack
+C setups and maybe 2 or 3 wasted cycles at the end. Code saving would be
+C 256 bytes.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 8)
+',`
+deflit(UNROLL_THRESHOLD, 8)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_XSIZE, %ecx
+ movl PARAM_YP, %eax
+
+ movl PARAM_XP, %edx
+ movl (%eax), %eax C yp low limb
+
+ cmpl $2, %ecx
+ ja L(xsize_more_than_two_limbs)
+ je L(two_by_something)
+
+
+ C one limb by one limb
+
+ movl (%edx), %edx C xp low limb
+ movl PARAM_WP, %ecx
+
+ mull %edx
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+ decl PARAM_YSIZE
+ pushl %ebx
+deflit(`FRAME',4)
+
+ movl PARAM_WP, %ebx
+ pushl %esi
+deflit(`FRAME',8)
+
+ movl %eax, %ecx C yp low limb
+ movl (%edx), %eax C xp low limb
+
+ movl %edx, %esi C xp
+ jnz L(two_by_two)
+
+
+ C two limbs by one limb
+
+ mull %ecx
+
+ movl %eax, (%ebx)
+ movl 4(%esi), %eax
+
+ movl %edx, %esi C carry
+
+ mull %ecx
+
+ addl %eax, %esi
+ movl %esi, 4(%ebx)
+
+ adcl $0, %edx
+
+ movl %edx, 8(%ebx)
+ popl %esi
+
+ popl %ebx
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(two_by_two):
+ C eax xp low limb
+ C ebx wp
+ C ecx yp low limb
+ C edx
+ C esi xp
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ mull %ecx C xp[0] * yp[0]
+
+ push %edi
+deflit(`FRAME',12)
+ movl %eax, (%ebx)
+
+ movl 4(%esi), %eax
+ movl %edx, %edi C carry, for wp[1]
+
+ mull %ecx C xp[1] * yp[0]
+
+ addl %eax, %edi
+ movl PARAM_YP, %ecx
+
+ adcl $0, %edx
+
+ movl %edi, 4(%ebx)
+ movl 4(%ecx), %ecx C yp[1]
+
+ movl 4(%esi), %eax C xp[1]
+ movl %edx, %edi C carry, for wp[2]
+
+ mull %ecx C xp[1] * yp[1]
+
+ addl %eax, %edi
+
+ adcl $0, %edx
+
+ movl (%esi), %eax C xp[0]
+ movl %edx, %esi C carry, for wp[3]
+
+ mull %ecx C xp[0] * yp[1]
+
+ addl %eax, 4(%ebx)
+ adcl %edx, %edi
+ adcl $0, %esi
+
+ movl %edi, 8(%ebx)
+ popl %edi
+
+ movl %esi, 12(%ebx)
+ popl %esi
+
+ popl %ebx
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(xsize_more_than_two_limbs):
+
+C The first limb of yp is processed with a simple mpn_mul_1 style loop
+C inline. Unrolling this doesn't seem worthwhile since it's only run once
+C (whereas the addmul below is run ysize-1 many times). A call to the
+C actual mpn_mul_1 will be slowed down by the call and parameter pushing and
+C popping, and doesn't seem likely to be worthwhile on the typical 10-20
+C limb operations the Karatsuba code calls here with.
+
+ C eax yp[0]
+ C ebx
+ C ecx xsize
+ C edx xp
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',0)
+
+ pushl %edi defframe_pushl(SAVE_EDI)
+ pushl %ebp defframe_pushl(SAVE_EBP)
+
+ movl PARAM_WP, %edi
+ pushl %esi defframe_pushl(SAVE_ESI)
+
+ movl %eax, %ebp
+ pushl %ebx defframe_pushl(SAVE_EBX)
+
+ leal (%edx,%ecx,4), %ebx C xp end
+ xorl %esi, %esi
+
+ leal (%edi,%ecx,4), %edi C wp end of mul1
+ negl %ecx
+
+
+L(mul1):
+ C eax scratch
+ C ebx xp end
+ C ecx counter, negative
+ C edx scratch
+ C esi carry
+ C edi wp end of mul1
+ C ebp multiplier
+
+ movl (%ebx,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi,%ecx,4)
+ incl %ecx
+
+ jnz L(mul1)
+
+
+ movl PARAM_YSIZE, %edx
+ movl %esi, (%edi) C final carry
+
+ movl PARAM_XSIZE, %ecx
+ decl %edx
+
+ jnz L(ysize_more_than_one_limb)
+
+ popl %ebx
+ popl %esi
+ popl %ebp
+ popl %edi
+ ret
+
+
+L(ysize_more_than_one_limb):
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_YP, %eax
+
+ jae L(unroll)
+
+
+C -----------------------------------------------------------------------------
+C Simple addmul loop.
+C
+C Using ebx and edi pointing at the ends of their respective locations saves
+C a couple of instructions in the outer loop. The inner loop is still 11
+C cycles, the same as the simple loop in aorsmul_1.asm.
+
+ C eax yp
+ C ebx xp end
+ C ecx xsize
+ C edx ysize-1
+ C esi
+ C edi wp end of mul1
+ C ebp
+
+ movl 4(%eax), %ebp C multiplier
+ negl %ecx
+
+ movl %ecx, PARAM_XSIZE C -xsize
+ xorl %esi, %esi C initial carry
+
+ leal 4(%eax,%edx,4), %eax C yp end
+ negl %edx
+
+ movl %eax, PARAM_YP
+ movl %edx, PARAM_YSIZE
+
+ jmp L(simple_outer_entry)
+
+
+ C aligning here saves a couple of cycles
+ ALIGN(16)
+L(simple_outer_top):
+ C edx ysize counter, negative
+
+ movl PARAM_YP, %eax C yp end
+ xorl %esi, %esi C carry
+
+ movl PARAM_XSIZE, %ecx C -xsize
+ movl %edx, PARAM_YSIZE
+
+ movl (%eax,%edx,4), %ebp C yp limb multiplier
+L(simple_outer_entry):
+ addl $4, %edi
+
+
+L(simple_inner):
+ C eax scratch
+ C ebx xp end
+ C ecx counter, negative
+ C edx scratch
+ C esi carry
+ C edi wp end of this addmul
+ C ebp multiplier
+
+ movl (%ebx,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl $0, %edx
+ addl %eax, (%edi,%ecx,4)
+ adcl %edx, %esi
+
+ incl %ecx
+ jnz L(simple_inner)
+
+
+ movl PARAM_YSIZE, %edx
+ movl %esi, (%edi)
+
+ incl %edx
+ jnz L(simple_outer_top)
+
+
+ popl %ebx
+ popl %esi
+ popl %ebp
+ popl %edi
+ ret
+
+
+C -----------------------------------------------------------------------------
+C Unrolled loop.
+C
+C The unrolled inner loop is the same as in aorsmul_1.asm, see that code for
+C some comments.
+C
+C VAR_COUNTER is for the inner loop, running from VAR_COUNTER_INIT down to
+C 0, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C PARAM_XP and PARAM_WP get offset appropriately for where the unrolled loop
+C is entered.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop. This can't just be fetched through the xp
+C pointer because of the offset applied to it.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C PARAM_WP is similarly offset so that the PARAM_YSIZE counter can be added
+C to give the starting point in the destination for each unrolled loop (this
+C point is one limb upwards for each limb of yp processed).
+C
+C Having PARAM_YSIZE count negative to zero means it's not necessary to
+C store new values of PARAM_YP and PARAM_WP on each loop. Those values on
+C the stack remain constant and on each loop an leal adjusts them with the
+C PARAM_YSIZE counter value.
+
+
+defframe(VAR_COUNTER, -20)
+defframe(VAR_COUNTER_INIT, -24)
+defframe(VAR_JMP, -28)
+defframe(VAR_XP_LOW, -32)
+deflit(VAR_STACK_SPACE, 16)
+
+dnl For some strange reason using (%esp) instead of 0(%esp) is a touch
+dnl slower in this code, hence the defframe empty-if-zero feature is
+dnl disabled.
+dnl
+dnl If VAR_COUNTER is at (%esp), the effect is worse. In this case the
+dnl unrolled loop is 255 instead of 256 bytes, but quite how this affects
+dnl anything isn't clear.
+dnl
+define(`defframe_empty_if_zero_disabled',1)
+
+L(unroll):
+ C eax yp (not used)
+ C ebx xp end (not used)
+ C ecx xsize
+ C edx ysize-1
+ C esi
+ C edi wp end of mul1 (not used)
+ C ebp
+deflit(`FRAME', 16)
+
+ leal -2(%ecx), %ebp C one limb processed at start,
+ decl %ecx C and ebp is one less
+
+ shrl $UNROLL_LOG2, %ebp
+ negl %ecx
+
+ subl $VAR_STACK_SPACE, %esp
+deflit(`FRAME', 16+VAR_STACK_SPACE)
+ andl $UNROLL_MASK, %ecx
+
+ movl %ecx, %esi
+ shll $4, %ecx
+
+ movl %ebp, VAR_COUNTER_INIT
+ negl %esi
+
+ C 15 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(unroll_here):
+',`
+ leal L(unroll_entry) (%ecx,%esi,1), %ecx
+')
+
+ movl PARAM_XP, %ebx
+ movl %ebp, VAR_COUNTER
+
+ movl PARAM_WP, %edi
+ movl %ecx, VAR_JMP
+
+ movl (%ebx), %eax
+ leal 4(%edi,%esi,4), %edi C wp adjust for unrolling and mul1
+
+ leal (%ebx,%esi,4), %ebx C xp adjust for unrolling
+
+ movl %eax, VAR_XP_LOW
+
+ movl %ebx, PARAM_XP
+ movl PARAM_YP, %ebx
+
+ leal (%edi,%edx,4), %ecx C wp adjust for ysize indexing
+ movl 4(%ebx), %ebp C multiplier (yp second limb)
+
+ leal 4(%ebx,%edx,4), %ebx C yp adjust for ysize indexing
+
+ movl %ecx, PARAM_WP
+
+ leal 1(%esi), %ecx C adjust parity for decl %ecx above
+
+ movl %ebx, PARAM_YP
+ negl %edx
+
+ movl %edx, PARAM_YSIZE
+ jmp L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%ecx,%esi,1), %ecx
+ addl $L(unroll_entry)-L(unroll_here), %ecx
+ addl (%esp), %ecx
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ C Aligning here saves a couple of cycles per loop. Using 32 doesn't
+ C cost any extra space, since the inner unrolled loop below is
+ C aligned to 32.
+ ALIGN(32)
+L(unroll_outer_top):
+ C edx ysize
+
+ movl PARAM_YP, %eax
+ movl %edx, PARAM_YSIZE C incremented ysize counter
+
+ movl PARAM_WP, %edi
+
+ movl VAR_COUNTER_INIT, %ebx
+ movl (%eax,%edx,4), %ebp C next multiplier
+
+ movl PARAM_XSIZE, %ecx
+ leal (%edi,%edx,4), %edi C adjust wp for where we are in yp
+
+ movl VAR_XP_LOW, %eax
+ movl %ebx, VAR_COUNTER
+
+L(unroll_outer_entry):
+ mull %ebp
+
+ C using testb is a tiny bit faster than testl
+ testb $1, %cl
+
+ movl %eax, %ecx C low carry
+ movl VAR_JMP, %eax
+
+ movl %edx, %esi C high carry
+ movl PARAM_XP, %ebx
+
+ jnz L(unroll_noswap)
+ movl %ecx, %esi C high,low carry other way around
+
+ movl %edx, %ecx
+L(unroll_noswap):
+
+ jmp *%eax
+
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(unroll_top):
+ C eax scratch
+ C ebx xp
+ C ecx carry low
+ C edx scratch
+ C esi carry high
+ C edi wp
+ C ebp multiplier
+ C VAR_COUNTER loop counter
+ C
+ C 15 code bytes each limb
+
+ leal UNROLL_BYTES(%edi), %edi
+
+L(unroll_entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4))
+ deflit(`disp1', eval(disp0 + 4))
+ deflit(`disp2', eval(disp1 + 4))
+
+ movl disp1(%ebx), %eax
+ mull %ebp
+Zdisp( addl, %ecx, disp0,(%edi))
+ adcl %eax, %esi
+ movl %edx, %ecx
+ jadcl0( %ecx)
+
+ movl disp2(%ebx), %eax
+ mull %ebp
+ addl %esi, disp1(%edi)
+ adcl %eax, %ecx
+ movl %edx, %esi
+ jadcl0( %esi)
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%ebx), %ebx
+
+ jns L(unroll_top)
+
+
+ movl PARAM_YSIZE, %edx
+ addl %ecx, UNROLL_BYTES(%edi)
+
+ adcl $0, %esi
+
+ incl %edx
+ movl %esi, UNROLL_BYTES+4(%edi)
+
+ jnz L(unroll_outer_top)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+
+ addl $FRAME, %esp
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/sqr_basecase.asm b/rts/gmp/mpn/x86/k6/sqr_basecase.asm
new file mode 100644
index 0000000000..70d49b3e57
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/sqr_basecase.asm
@@ -0,0 +1,672 @@
+dnl AMD K6 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular
+dnl product (measured on the speed difference between 17 and 33 limbs,
+dnl which is roughly the Karatsuba recursing range).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl KARATSUBA_SQR_THRESHOLD_MAX is the maximum KARATSUBA_SQR_THRESHOLD this
+dnl code supports. This value is used only by the tune program to know
+dnl what it can go up to. (An attempt to compile with a bigger value will
+dnl trigger some m4_assert()s in the code, making the build fail.)
+dnl
+dnl The value is determined by requiring the displacements in the unrolled
+dnl addmul to fit in single bytes. This means a maximum UNROLL_COUNT of
+dnl 63, giving a maximum KARATSUBA_SQR_THRESHOLD of 66.
+
+deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66)
+
+
+dnl Allow a value from the tune program to override config.m4.
+
+ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',
+`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')
+
+
+dnl UNROLL_COUNT is the number of code chunks in the unrolled addmul. The
+dnl number required is determined by KARATSUBA_SQR_THRESHOLD, since
+dnl mpn_sqr_basecase only needs to handle sizes < KARATSUBA_SQR_THRESHOLD.
+dnl
+dnl The first addmul is the biggest, and this takes the second least
+dnl significant limb and multiplies it by the third least significant and
+dnl up. Hence for a maximum operand size of KARATSUBA_SQR_THRESHOLD-1
+dnl limbs, UNROLL_COUNT needs to be KARATSUBA_SQR_THRESHOLD-3.
+
+m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')
+deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is essentially the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the given size
+C is small.
+C
+C The code size might look a bit excessive, but not all of it is executed
+C and so won't fill up the code cache. The 1x1, 2x2 and 3x3 special cases
+C clearly apply only to those sizes; mid sizes like 10x10 only need part of
+C the unrolled addmul; and big sizes like 35x35 that do need all of it will
+C at least be getting value for money, because 35x35 spends something like
+C 5780 cycles here.
+C
+C Different values of UNROLL_COUNT give slightly different speeds, between
+C 9.0 and 9.2 c/tri-prod measured on the difference between 17 and 33 limbs.
+C This isn't a big difference, but it's presumably some alignment effect
+C which if understood could give a simple speedup.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl PARAM_SRC, %eax
+
+ cmpl $2, %ecx
+ je L(two_limbs)
+
+ movl PARAM_DST, %edx
+ ja L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+
+ movl (%eax), %eax
+ movl %edx, %ecx
+
+ mull %eax
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+
+ pushl %ebx
+ movl %eax, %ebx C src
+deflit(`FRAME',4)
+
+ movl (%ebx), %eax
+ movl PARAM_DST, %ecx
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx)
+ movl 4(%ebx), %eax
+
+ movl %edx, 4(%ecx)
+
+ mull %eax C src[1]^2
+
+ movl %eax, 8(%ecx)
+ movl (%ebx), %eax
+
+ movl %edx, 12(%ecx)
+ movl 4(%ebx), %edx
+
+ mull %edx C src[0]*src[1]
+
+ addl %eax, 4(%ecx)
+
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+
+ popl %ebx
+ addl %eax, 4(%ecx)
+
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(three_or_more):
+deflit(`FRAME',0)
+ cmpl $4, %ecx
+ jae L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+ C eax src
+ C ecx size
+ C edx dst
+
+ pushl %ebx
+ movl %eax, %ebx C src
+
+ movl (%ebx), %eax
+ movl %edx, %ecx C dst
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl 4(%ebx), %eax
+
+ movl %edx, 4(%ecx)
+ pushl %esi
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl 8(%ebx), %eax
+
+ movl %edx, 12(%ecx)
+ pushl %edi
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl (%ebx), %eax
+
+ movl %edx, 20(%ecx)
+ movl 4(%ebx), %edx
+
+ mull %edx C src[0] * src[1]
+
+ movl %eax, %esi
+ movl (%ebx), %eax
+
+ movl %edx, %edi
+ movl 8(%ebx), %edx
+
+ pushl %ebp
+ xorl %ebp, %ebp
+
+ mull %edx C src[0] * src[2]
+
+ addl %eax, %edi
+ movl 4(%ebx), %eax
+
+ adcl %edx, %ebp
+
+ movl 8(%ebx), %edx
+
+ mull %edx C src[1] * src[2]
+
+ addl %eax, %ebp
+
+ adcl $0, %edx
+
+
+ C eax will be dst[5]
+ C ebx
+ C ecx dst
+ C edx dst[4]
+ C esi dst[1]
+ C edi dst[2]
+ C ebp dst[3]
+
+ xorl %eax, %eax
+ addl %esi, %esi
+ adcl %edi, %edi
+ adcl %ebp, %ebp
+ adcl %edx, %edx
+ adcl $0, %eax
+
+ addl %esi, 4(%ecx)
+ adcl %edi, 8(%ecx)
+ adcl %ebp, 12(%ecx)
+
+ popl %ebp
+ popl %edi
+
+ adcl %edx, 16(%ecx)
+
+ popl %esi
+ popl %ebx
+
+ adcl %eax, 20(%ecx)
+ ASSERT(nc)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+defframe(VAR_COUNTER,-20)
+defframe(VAR_JMP, -24)
+deflit(STACK_SPACE, 24)
+
+ ALIGN(16)
+L(four_or_more):
+
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+ C esi
+ C edi
+ C ebp
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+C
+C A test was done calling mpn_mul_1 here to get the benefit of its unrolled
+C loop, but this was only a tiny speedup; at 35 limbs it took 24 cycles off
+C a 5780 cycle operation, which is not surprising since the loop here is 8
+C c/l and mpn_mul_1 is 6.25 c/l.
+
+ subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
+
+ movl %edi, SAVE_EDI
+ leal 4(%edx), %edi
+
+ movl %ebx, SAVE_EBX
+ leal 4(%eax), %ebx
+
+ movl %esi, SAVE_ESI
+ xorl %esi, %esi
+
+ movl %ebp, SAVE_EBP
+
+ C eax
+ C ebx src+4
+ C ecx size
+ C edx
+ C esi
+ C edi dst+4
+ C ebp
+
+ movl (%eax), %ebp C multiplier
+ leal -1(%ecx), %ecx C size-1, and pad to a 16 byte boundary
+
+
+ ALIGN(16)
+L(mul_1):
+ C eax scratch
+ C ebx src ptr
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi dst ptr
+ C ebp multiplier
+
+ movl (%ebx), %eax
+ addl $4, %ebx
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi)
+ addl $4, %edi
+
+ loop L(mul_1)
+
+
+C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two addmuls, which are the bottom right corner of the product
+C triangle, are left to the end. These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1]. If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as mpn_addmul_1(), see that routine for some
+C comments.
+C
+C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+C
+C K6 doesn't do any branch prediction on indirect jumps, which is good
+C actually because it's a different target each time. The unrolled addmul
+C is about 3 cycles/limb faster than a simple loop, so the 6 cycle cost of
+C the indirect jump is quickly recovered.
+
+
+dnl This value is also implicitly encoded in a shift and add.
+dnl
+deflit(CODE_BYTES_PER_LIMB, 15)
+
+dnl With the unmodified &src[size] and &dst[size] pointers, the
+dnl displacements in the unrolled code fit in a byte for UNROLL_COUNT
+dnl values up to 31. Above that an offset must be added to them.
+dnl
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>31),1,
+eval((UNROLL_COUNT-31)*4),
+0))
+
+ C eax
+ C ebx &src[size]
+ C ecx
+ C edx
+ C esi carry
+ C edi &dst[size]
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, (%edi)
+
+ subl $4, %ecx
+ jz L(corner)
+
+ movl %ecx, %edx
+ifelse(OFFSET,0,,
+` subl $OFFSET, %ebx')
+
+ shll $4, %ecx
+ifelse(OFFSET,0,,
+` subl $OFFSET, %edi')
+
+ negl %ecx
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+ negl %edx
+
+
+ C The calculated jump mustn't be before the start of the available
+ C code. This is the limitation UNROLL_COUNT puts on the src operand
+ C size, but checked here using the jump address directly.
+ C
+ ASSERT(ae,`
+ movl_text_address( L(unroll_inner_start), %eax)
+ cmpl %eax, %ecx
+ ')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll_outer_top):
+ C eax
+ C ebx &src[size], constant
+ C ecx VAR_JMP
+ C edx VAR_COUNTER, limbs, negative
+ C esi high limb to store
+ C edi dst ptr, high of last addmul
+ C ebp
+
+ movl -12+OFFSET(%ebx,%edx,4), %ebp C multiplier
+ movl %edx, VAR_COUNTER
+
+ movl -8+OFFSET(%ebx,%edx,4), %eax C first limb of multiplicand
+
+ mull %ebp
+
+ testb $1, %cl
+
+ movl %edx, %esi C high carry
+ movl %ecx, %edx C jump
+
+ movl %eax, %ecx C low carry
+ leal CODE_BYTES_PER_LIMB(%edx), %edx
+
+ movl %edx, VAR_JMP
+ leal 4(%edi), %edi
+
+ C A branch-free version of this using some xors was found to be a
+ C touch slower than just a conditional jump, despite the jump
+ C switching between taken and not taken on every loop.
+
+ifelse(eval(UNROLL_COUNT%2),0,
+ jz,jnz) L(unroll_noswap)
+ movl %esi, %eax C high,low carry other way around
+
+ movl %ecx, %esi
+ movl %eax, %ecx
+L(unroll_noswap):
+
+ jmp *%edx
+
+
+ C Must be on an even address here so the low bit of the jump address
+ C will indicate which way around ecx/esi should start.
+ C
+ C An attempt was made at padding here to get the end of the unrolled
+ C code to come out on a good alignment, to save padding before
+ C L(corner). This worked, but turned out to run slower than just an
+ C ALIGN(2). The reason for this is not clear, it might be related
+ C to the different speeds on different UNROLL_COUNTs noted above.
+
+ ALIGN(2)
+
+L(unroll_inner_start):
+ C eax scratch
+ C ebx src
+ C ecx carry low
+ C edx scratch
+ C esi carry high
+ C edi dst
+ C ebp multiplier
+ C
+ C 15 code bytes each limb
+ C ecx/esi swapped on each chunk
+
+forloop(`i', UNROLL_COUNT, 1, `
+ deflit(`disp_src', eval(-i*4 + OFFSET))
+ deflit(`disp_dst', eval(disp_src - 4))
+
+ m4_assert(`disp_src>=-128 && disp_src<128')
+ m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl, disp_src,(%ebx), %eax)
+ mull %ebp
+Zdisp( addl, %esi, disp_dst,(%edi))
+ adcl %eax, %ecx
+ movl %edx, %esi
+ jadcl0( %esi)
+',`
+ dnl this one comes out last
+Zdisp( movl, disp_src,(%ebx), %eax)
+ mull %ebp
+Zdisp( addl, %ecx, disp_dst,(%edi))
+ adcl %eax, %esi
+ movl %edx, %ecx
+ jadcl0( %ecx)
+')
+')
+L(unroll_inner_end):
+
+ addl %esi, -4+OFFSET(%edi)
+
+ movl VAR_COUNTER, %edx
+ jadcl0( %ecx)
+
+ movl %ecx, m4_empty_if_zero(OFFSET)(%edi)
+ movl VAR_JMP, %ecx
+
+ incl %edx
+ jnz L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+ addl $OFFSET, %ebx
+ addl $OFFSET, %edi
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(corner):
+ C ebx &src[size]
+ C edi &dst[2*size-5]
+
+ movl -12(%ebx), %ebp
+
+ movl -8(%ebx), %eax
+ movl %eax, %ecx
+
+ mull %ebp
+
+ addl %eax, -4(%edi)
+ adcl $0, %edx
+
+ movl -4(%ebx), %eax
+ movl %edx, %esi
+ movl %eax, %ebx
+
+ mull %ebp
+
+ addl %esi, %eax
+ adcl $0, %edx
+
+ addl %eax, (%edi)
+ adcl $0, %edx
+
+ movl %edx, %esi
+ movl %ebx, %eax
+
+ mull %ecx
+
+ addl %esi, %eax
+ movl %eax, 4(%edi)
+
+ adcl $0, %edx
+
+ movl %edx, 8(%edi)
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1].
+C The loop measures about 6 cycles/iteration, though it looks like it should
+C decode in 5.
+
+L(lshift_start):
+ movl PARAM_SIZE, %ecx
+
+ movl PARAM_DST, %edi
+ subl $1, %ecx C size-1 and clear carry
+
+ movl PARAM_SRC, %ebx
+ movl %ecx, %edx
+
+ xorl %eax, %eax C ready for adcl
+
+
+ ALIGN(16)
+L(lshift):
+ C eax
+ C ebx src (for later use)
+ C ecx counter, decrementing
+ C edx size-1 (for later use)
+ C esi
+ C edi dst, incrementing
+ C ebp
+
+ rcll 4(%edi)
+ rcll 8(%edi)
+ leal 8(%edi), %edi
+ loop L(lshift)
+
+
+ adcl %eax, %eax
+
+ movl %eax, 4(%edi) C dst most significant limb
+ movl (%ebx), %eax C src[0]
+
+ leal 4(%ebx,%edx,4), %ebx C &src[size]
+ subl %edx, %ecx C -(size-1)
+
+
+C -----------------------------------------------------------------------------
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+
+ mull %eax
+
+ movl %eax, (%edi,%ecx,8) C dst[0]
+
+
+ ALIGN(16)
+L(diag):
+ C eax scratch
+ C ebx &src[size]
+ C ecx counter, negative
+ C edx carry
+ C esi scratch
+ C edi dst[2*size-2]
+ C ebp
+
+ movl (%ebx,%ecx,4), %eax
+ movl %edx, %esi
+
+ mull %eax
+
+ addl %esi, 4(%edi,%ecx,8)
+ adcl %eax, 8(%edi,%ecx,8)
+ adcl $0, %edx
+
+ incl %ecx
+ jnz L(diag)
+
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ addl %edx, 4(%edi) C dst most significant limb
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ addl (%esp), %ecx
+ addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
+ addl %edx, %ecx
+ ret
+')
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/README b/rts/gmp/mpn/x86/k7/README
new file mode 100644
index 0000000000..c34315c401
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/README
@@ -0,0 +1,145 @@
+
+ AMD K7 MPN SUBROUTINES
+
+
+This directory contains code optimized for the AMD Athlon CPU.
+
+The mmx subdirectory has routines using MMX instructions. All Athlons have
+MMX, the separate directory is just so that configure can omit it if the
+assembler doesn't support MMX.
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache.
+
+ cycles/limb
+ mpn_add/sub_n 1.6
+
+ mpn_copyi 0.75 or 1.0 \ varying with data alignment
+ mpn_copyd 0.75 or 1.0 /
+
+ mpn_divrem_1 17.0 integer part, 15.0 fractional part
+ mpn_mod_1 17.0
+ mpn_divexact_by3 8.0
+
+ mpn_l/rshift 1.2
+
+ mpn_mul_1 3.4
+ mpn_addmul/submul_1 3.9
+
+ mpn_mul_basecase 4.42 cycles/crossproduct (approx)
+
+ mpn_popcount 5.0
+ mpn_hamdist 6.0
+
+Prefetching of sources hasn't yet been tried.
+
+
+
+NOTES
+
+cmov, MMX, 3DNow and some extensions to MMX and 3DNow are available.
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+
+Floating point multiplications can be done in parallel with integer
+multiplications, but there doesn't seem to be any way to make use of this.
+
+Unsigned "mul"s can be issued every 3 cycles. This suggests 3 is a limit on
+the speed of the multiplication routines. The documentation shows mul
+executing in IEU0 (or maybe in IEU0 and IEU1 together), so it might be that,
+to get near 3 cycles code has to be arranged so that nothing else is issued
+to IEU0. A busy IEU0 could explain why some code takes 4 cycles and other
+apparently equivalent code takes 5.
+
+
+
+OPTIMIZATIONS
+
+Unrolled loops are used to reduce looping overhead. The unrolling is
+configurable up to 32 limbs/loop for most routines and up to 64 for some.
+The K7 has 64k L1 code cache so quite big unrolling is allowable.
+
+Computed jumps into the unrolling are used to handle sizes not a multiple of
+the unrolling. An attractive feature of this is that times increase
+smoothly with operand size, but it may be that some routines should just
+have simple loops to finish up, especially when PIC adds between 2 and 16
+cycles to get %eip.
+
+Position independent code is implemented using a call to get %eip for the
+computed jumps and a ret is always done, rather than an addl $4,%esp or a
+popl, so the CPU return address branch prediction stack stays synchronised
+with the actual stack in memory.
+
+Branch prediction, in absence of any history, will guess forward jumps are
+not taken and backward jumps are taken. Where possible it's arranged that
+the less likely or less important case is under a taken forward jump.
+
+
+
+CODING
+
+Instructions in general code have been shown grouped if they can execute
+together, which means up to three direct-path instructions which have no
+successive dependencies. K7 always decodes three and has out-of-order
+execution, but the groupings show what slots might be available and what
+dependency chains exist.
+
+When there's vector-path instructions an effort is made to get triplets of
+direct-path instructions in between them, even if there's dependencies,
+since this maximizes decoding throughput and might save a cycle or two if
+decoding is the limiting factor.
+
+
+
+INSTRUCTIONS
+
+adcl direct
+divl 39 cycles back-to-back
+lodsl,etc vector
+loop 1 cycle vector (decl/jnz opens up one decode slot)
+movd reg vector
+movd mem direct
+mull issue every 3 cycles, latency 4 cycles low word, 6 cycles high word
+popl vector (use movl for more than one pop)
+pushl direct, will pair with a load
+shrdl %cl vector, 3 cycles, seems to be 3 decode too
+xorl r,r false read dependency recognised
+
+
+
+REFERENCES
+
+"AMD Athlon Processor X86 Code Optimization Guide", AMD publication number
+22007, revision E, November 1999. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22007.pdf
+
+"3DNow Technology Manual", AMD publication number 21928F/0-August 1999.
+This describes the femms and prefetch instructions. Available on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21928.pdf
+
+"AMD Extensions to the 3DNow and MMX Instruction Sets Manual", AMD
+publication number 22466, revision B, August 1999. This describes
+instructions added in the Athlon processor, such as pswapd and the extra
+prefetch forms. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22466.pdf
+
+"3DNow Instruction Porting Guide", AMD publication number 22621, revision B,
+August 1999. This has some notes on general Athlon optimizations as well as
+3DNow. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22621.pdf
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/k7/aors_n.asm b/rts/gmp/mpn/x86/k7/aors_n.asm
new file mode 100644
index 0000000000..85fa9d3036
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/aors_n.asm
@@ -0,0 +1,250 @@
+dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract.
+dnl
+dnl K7: 1.64 cycles/limb (at 16 limb/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 8 1.9
+dnl 16 1.64
+dnl 32 1.7
+dnl 64 2.0
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_add_n', `
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+ define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+ define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size. The return value is the carry bit from the top of the result (1
+C or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation. Note values other than 1 or 0 here will lead to garbage
+C results.
+C
+C This code runs at 1.64 cycles/limb, which is probably the best possible
+C with plain integer operations. Each limb is 2 loads and 1 store, and in
+C one cycle the K7 can do two loads, or a load and a store, leading to 1.5
+C c/l.
+
+dnl Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1.
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 8)
+',`
+deflit(UNROLL_THRESHOLD, 8)
+')
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBP, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+defframe(SAVE_EDI, -16)
+deflit(STACK_SPACE, 16)
+
+ .text
+ ALIGN(32)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function_nc)
+ movl PARAM_CARRY, %eax
+ jmp LF(M4_function_n,start)
+EPILOGUE()
+
+PROLOGUE(M4_function_n)
+
+ xorl %eax, %eax C carry
+L(start):
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %edi, SAVE_EDI
+ movl %ebx, SAVE_EBX
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_SRC2, %edx
+ movl PARAM_SRC1, %ebx
+ jae L(unroll)
+
+ movl PARAM_DST, %edi
+ leal (%ebx,%ecx,4), %ebx
+ leal (%edx,%ecx,4), %edx
+
+ leal (%edi,%ecx,4), %edi
+ negl %ecx
+ shrl %eax
+
+ C This loop in in a single 16 byte code block already, so no
+ C alignment necessary.
+L(simple):
+ C eax scratch
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+
+ movl (%ebx,%ecx,4), %eax
+ M4_inst (%edx,%ecx,4), %eax
+ movl %eax, (%edi,%ecx,4)
+ incl %ecx
+ jnz L(simple)
+
+ movl $0, %eax
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBX, %ebx
+ setc %al
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ C This is at 0x55, close enough to aligned.
+L(unroll):
+deflit(`FRAME',STACK_SPACE)
+ movl %ebp, SAVE_EBP
+ andl $-2, %ecx C size low bit masked out
+ andl $1, PARAM_SIZE C size low bit kept
+
+ movl %ecx, %edi
+ decl %ecx
+ movl PARAM_DST, %ebp
+
+ shrl $UNROLL_LOG2, %ecx
+ negl %edi
+ movl %esi, SAVE_ESI
+
+ andl $UNROLL_MASK, %edi
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%edi,%edi,8), %esi C 9 bytes per
+')
+ negl %edi
+ shrl %eax
+
+ leal ifelse(UNROLL_BYTES,256,128) (%ebx,%edi,4), %ebx
+ leal ifelse(UNROLL_BYTES,256,128) (%edx,%edi,4), %edx
+ leal ifelse(UNROLL_BYTES,256,128) (%ebp,%edi,4), %edi
+
+ jmp *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%edi,%edi,8), %esi
+ addl $L(entry)-L(here), %esi
+ addl (%esp), %esi
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(top):
+ C eax zero
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi scratch (was computed jump)
+ C edi dst
+ C ebp scratch
+
+ leal UNROLL_BYTES(%edx), %edx
+
+L(entry):
+deflit(CHUNK_COUNT, 2)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%ebx), %esi)
+ movl disp1(%ebx), %ebp
+Zdisp( M4_inst,disp0,(%edx), %esi)
+Zdisp( movl, %esi, disp0,(%edi))
+ M4_inst disp1(%edx), %ebp
+ movl %ebp, disp1(%edi)
+')
+
+ decl %ecx
+ leal UNROLL_BYTES(%ebx), %ebx
+ leal UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+ mov PARAM_SIZE, %esi
+ movl SAVE_EBP, %ebp
+ movl $0, %eax
+
+ decl %esi
+ js L(even)
+
+ movl (%ebx), %ecx
+ M4_inst UNROLL_BYTES(%edx), %ecx
+ movl %ecx, (%edi)
+L(even):
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+ setc %al
+
+ movl SAVE_ESI, %esi
+ addl $STACK_SPACE, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/aorsmul_1.asm b/rts/gmp/mpn/x86/k7/aorsmul_1.asm
new file mode 100644
index 0000000000..9f9c3daaf4
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/aorsmul_1.asm
@@ -0,0 +1,364 @@
+dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl
+dnl K7: 3.9 cycles/limb.
+dnl
+dnl Future: It should be possible to avoid the separate mul after the
+dnl unrolled loop by moving the movl/adcl to the top.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 4 4.42
+dnl 8 4.16
+dnl 16 3.9
+dnl 32 3.9
+dnl 64 3.87
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1',`
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+ define(M4_function_1c, mpn_addmul_1c)
+ define(M4_description, add it to)
+ define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1',`
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+ define(M4_function_1c, mpn_submul_1c)
+ define(M4_description, subtract it from)
+ define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 9)
+',`
+deflit(UNROLL_THRESHOLD, 6)
+')
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(SAVE_SIZE, 16)
+
+ .text
+ ALIGN(32)
+PROLOGUE(M4_function_1)
+ movl PARAM_SIZE, %edx
+ movl PARAM_SRC, %eax
+ xorl %ecx, %ecx
+
+ decl %edx
+ jnz LF(M4_function_1c,start_1)
+
+ movl (%eax), %eax
+ movl PARAM_DST, %ecx
+
+ mull PARAM_MULTIPLIER
+
+ M4_inst %eax, (%ecx)
+ adcl $0, %edx
+ movl %edx, %eax
+
+ ret
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(M4_function_1c)
+ movl PARAM_SIZE, %edx
+ movl PARAM_SRC, %eax
+
+ decl %edx
+ jnz L(more_than_one_limb)
+
+ movl (%eax), %eax
+ movl PARAM_DST, %ecx
+
+ mull PARAM_MULTIPLIER
+
+ addl PARAM_CARRY, %eax
+
+ adcl $0, %edx
+ M4_inst %eax, (%ecx)
+
+ adcl $0, %edx
+ movl %edx, %eax
+
+ ret
+
+
+ C offset 0x44 so close enough to aligned
+L(more_than_one_limb):
+ movl PARAM_CARRY, %ecx
+L(start_1):
+ C eax src
+ C ecx initial carry
+ C edx size-1
+ subl $SAVE_SIZE, %esp
+deflit(`FRAME',16)
+
+ movl %ebx, SAVE_EBX
+ movl %esi, SAVE_ESI
+ movl %edx, %ebx C size-1
+
+ movl PARAM_SRC, %esi
+ movl %ebp, SAVE_EBP
+ cmpl $UNROLL_THRESHOLD, %edx
+
+ movl PARAM_MULTIPLIER, %ebp
+ movl %edi, SAVE_EDI
+
+ movl (%esi), %eax C src low limb
+ movl PARAM_DST, %edi
+ ja L(unroll)
+
+
+ C simple loop
+
+ leal 4(%esi,%ebx,4), %esi C point one limb past last
+ leal (%edi,%ebx,4), %edi C point at last limb
+ negl %ebx
+
+ C The movl to load the next source limb is done well ahead of the
+ C mul. This is necessary for full speed, and leads to one limb
+ C handled separately at the end.
+
+L(simple):
+ C eax src limb
+ C ebx loop counter
+ C ecx carry limb
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ mull %ebp
+
+ addl %eax, %ecx
+ adcl $0, %edx
+
+ M4_inst %ecx, (%edi,%ebx,4)
+ movl (%esi,%ebx,4), %eax
+ adcl $0, %edx
+
+ incl %ebx
+ movl %edx, %ecx
+ jnz L(simple)
+
+
+ mull %ebp
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+
+ addl %eax, %ecx
+ adcl $0, %edx
+
+ M4_inst %ecx, (%edi)
+ adcl $0, %edx
+ movl SAVE_EDI, %edi
+
+ addl $SAVE_SIZE, %esp
+ movl %edx, %eax
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax src low limb
+ C ebx size-1
+ C ecx carry
+ C edx size-1
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+dnl overlapping with parameters no longer needed
+define(VAR_COUNTER,`PARAM_SIZE')
+define(VAR_JUMP, `PARAM_MULTIPLIER')
+
+ subl $2, %ebx C (size-2)-1
+ decl %edx C size-2
+
+ shrl $UNROLL_LOG2, %ebx
+ negl %edx
+
+ movl %ebx, VAR_COUNTER
+ andl $UNROLL_MASK, %edx
+
+ movl %edx, %ebx
+ shll $4, %edx
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%edx,%ebx,1), %edx
+')
+ negl %ebx
+ movl %edx, VAR_JUMP
+
+ mull %ebp
+
+ addl %eax, %ecx C initial carry, becomes low carry
+ adcl $0, %edx
+ testb $1, %bl
+
+ movl 4(%esi), %eax C src second limb
+ leal ifelse(UNROLL_BYTES,256,128+) 8(%esi,%ebx,4), %esi
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%ebx,4), %edi
+
+ movl %edx, %ebx C high carry
+ cmovnz( %ecx, %ebx) C high,low carry other way around
+ cmovnz( %edx, %ecx)
+
+ jmp *VAR_JUMP
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%edx,%ebx,1), %edx
+ addl $L(entry)-L(here), %edx
+ addl (%esp), %edx
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+C This code uses a "two carry limbs" scheme. At the top of the loop the
+C carries are ebx=lo, ecx=hi, then they swap for each limb processed. For
+C the computed jump an odd size means they start one way around, an even
+C size the other. Either way one limb is handled separately at the start of
+C the loop.
+C
+C The positioning of the movl to load the next source limb is important.
+C Moving it after the adcl with a view to avoiding a separate mul at the end
+C of the loop slows the code down.
+
+ ALIGN(32)
+L(top):
+ C eax src limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src+8
+ C edi dst
+ C ebp multiplier
+ C
+ C VAR_COUNTER loop counter
+ C
+ C 17 bytes each limb
+
+L(entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+ mull %ebp
+
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+ movl $0, %ecx
+
+ adcl %eax, %ebx
+
+Zdisp( movl, disp0,(%esi), %eax)
+ adcl %edx, %ecx
+
+
+ mull %ebp
+
+ M4_inst %ebx, disp1(%edi)
+ movl $0, %ebx
+
+ adcl %eax, %ecx
+
+ movl disp1(%esi), %eax
+ adcl %edx, %ebx
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%esi), %esi
+ leal UNROLL_BYTES(%edi), %edi
+
+ jns L(top)
+
+
+ C eax src limb
+ C ebx carry high
+ C ecx carry low
+ C edx
+ C esi
+ C edi dst (points at second last limb)
+ C ebp multiplier
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 4))
+
+ mull %ebp
+
+ M4_inst %ecx, disp0(%edi)
+ movl SAVE_EBP, %ebp
+
+ adcl %ebx, %eax
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ adcl $0, %edx
+ M4_inst %eax, disp1(%edi)
+ movl SAVE_EDI, %edi
+
+ adcl $0, %edx
+ addl $SAVE_SIZE, %esp
+
+ movl %edx, %eax
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/diveby3.asm b/rts/gmp/mpn/x86/k7/diveby3.asm
new file mode 100644
index 0000000000..57684958a5
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/diveby3.asm
@@ -0,0 +1,131 @@
+dnl AMD K7 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl K7: 8.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+dnl ceil(b/3) and floor(b*2/3) where b=2^32
+deflit(ONE_THIRD_CEIL, 0x55555556)
+deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %ecx
+ pushl %ebx defframe_pushl(SAVE_EBX)
+
+ movl PARAM_CARRY, %ebx
+ pushl %ebp defframe_pushl(SAVE_EBP)
+
+ movl PARAM_SIZE, %ebp
+ pushl %edi defframe_pushl(SAVE_EDI)
+
+ movl (%ecx), %eax C src low limb
+ pushl %esi defframe_pushl(SAVE_ESI)
+
+ movl PARAM_DST, %edi
+ movl $TWO_THIRDS_FLOOR, %esi
+ leal -4(%ecx,%ebp,4), %ecx C &src[size-1]
+
+ subl %ebx, %eax
+
+ setc %bl
+ decl %ebp
+ jz L(last)
+
+ leal (%edi,%ebp,4), %edi C &dst[size-1]
+ negl %ebp
+
+
+ ALIGN(16)
+L(top):
+ C eax src limb, carry subtracted
+ C ebx carry limb (0 or 1)
+ C ecx &src[size-1]
+ C edx scratch
+ C esi TWO_THIRDS_FLOOR
+ C edi &dst[size-1]
+ C ebp counter, limbs, negative
+
+ imull $INVERSE_3, %eax, %edx
+
+ movl 4(%ecx,%ebp,4), %eax C next src limb
+ cmpl $ONE_THIRD_CEIL, %edx
+
+ sbbl $-1, %ebx C +1 if result>=ceil(b/3)
+ cmpl %edx, %esi
+
+ sbbl %ebx, %eax C and further 1 if result>=ceil(b*2/3)
+ movl %edx, (%edi,%ebp,4)
+ incl %ebp
+
+ setc %bl C new carry
+ jnz L(top)
+
+
+
+L(last):
+ C eax src limb, carry subtracted
+ C ebx carry limb (0 or 1)
+ C ecx &src[size-1]
+ C edx scratch
+ C esi multiplier
+ C edi &dst[size-1]
+ C ebp
+
+ imull $INVERSE_3, %eax
+
+ cmpl $ONE_THIRD_CEIL, %eax
+ movl %eax, (%edi)
+ movl SAVE_EBP, %ebp
+
+ sbbl $-1, %ebx C +1 if eax>=ceil(b/3)
+ cmpl %eax, %esi
+ movl $0, %eax
+
+ adcl %ebx, %eax C further +1 if eax>=ceil(b*2/3)
+ movl SAVE_EDI, %edi
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/gmp-mparam.h b/rts/gmp/mpn/x86/k7/gmp-mparam.h
new file mode 100644
index 0000000000..c3bba0afc4
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/gmp-mparam.h
@@ -0,0 +1,100 @@
+/* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+/* the low limb is ready after 4 cycles, but normally it's the high limb
+ which is of interest, and that comes out after 6 cycles */
+#ifndef UMUL_TIME
+#define UMUL_TIME 6 /* cycles */
+#endif
+
+/* AMD doco says 40, but it measures 39 back-to-back */
+#ifndef UDIV_TIME
+#define UDIV_TIME 39 /* cycles */
+#endif
+
+/* using bsf */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 7 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 26
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 177
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 52
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 173
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 76
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 114
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 34
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 5
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 54
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 720, 1440, 2944, 7680, 18432, 57344, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 736
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 6912
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 784, 1696, 3200, 7680, 18432, 57344, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 800
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 8448
+#endif
diff --git a/rts/gmp/mpn/x86/k7/mmx/copyd.asm b/rts/gmp/mpn/x86/k7/mmx/copyd.asm
new file mode 100644
index 0000000000..33214daa1f
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/copyd.asm
@@ -0,0 +1,136 @@
+dnl AMD K7 mpn_copyd -- copy limb vector, decrementing.
+dnl
+dnl alignment dst/src, A=0mod8 N=4mod8
+dnl A/A A/N N/A N/N
+dnl K7 0.75 1.0 1.0 0.75
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The various comments in mpn/x86/k7/copyi.asm apply here too.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl parameter space reused
+define(SAVE_EBX,`PARAM_SIZE')
+define(SAVE_ESI,`PARAM_SRC')
+
+dnl minimum 5 since the unrolled code can't handle less than 5
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_copyd)
+
+ movl PARAM_SIZE, %ecx
+ movl %ebx, SAVE_EBX
+
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %edx
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ jae L(unroll)
+
+ orl %ecx, %ecx
+ jz L(simple_done)
+
+L(simple):
+ C eax src
+ C ebx scratch
+ C ecx counter
+ C edx dst
+ C
+ C this loop is 2 cycles/limb
+
+ movl -4(%eax,%ecx,4), %ebx
+ movl %ebx, -4(%edx,%ecx,4)
+ decl %ecx
+ jnz L(simple)
+
+L(simple_done):
+ movl SAVE_EBX, %ebx
+ ret
+
+
+L(unroll):
+ movl %esi, SAVE_ESI
+ leal (%eax,%ecx,4), %ebx
+ leal (%edx,%ecx,4), %esi
+
+ andl %esi, %ebx
+ movl SAVE_ESI, %esi
+ subl $4, %ecx C size-4
+
+ testl $4, %ebx C testl to pad code closer to 16 bytes for L(top)
+ jz L(aligned)
+
+ C both src and dst unaligned, process one limb to align them
+ movl 12(%eax,%ecx,4), %ebx
+ movl %ebx, 12(%edx,%ecx,4)
+ decl %ecx
+L(aligned):
+
+
+ ALIGN(16)
+L(top):
+ C eax src
+ C ebx
+ C ecx counter, limbs
+ C edx dst
+
+ movq 8(%eax,%ecx,4), %mm0
+ movq (%eax,%ecx,4), %mm1
+ subl $4, %ecx
+ movq %mm0, 16+8(%edx,%ecx,4)
+ movq %mm1, 16(%edx,%ecx,4)
+ jns L(top)
+
+
+ C now %ecx is -4 to -1 representing respectively 0 to 3 limbs remaining
+
+ testb $2, %cl
+ jz L(finish_not_two)
+
+ movq 8(%eax,%ecx,4), %mm0
+ movq %mm0, 8(%edx,%ecx,4)
+L(finish_not_two):
+
+ testb $1, %cl
+ jz L(done)
+
+ movl (%eax), %ebx
+ movl %ebx, (%edx)
+
+L(done):
+ movl SAVE_EBX, %ebx
+ emms
+ ret
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/copyi.asm b/rts/gmp/mpn/x86/k7/mmx/copyi.asm
new file mode 100644
index 0000000000..b234a1628c
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/copyi.asm
@@ -0,0 +1,147 @@
+dnl AMD K7 mpn_copyi -- copy limb vector, incrementing.
+dnl
+dnl alignment dst/src, A=0mod8 N=4mod8
+dnl A/A A/N N/A N/N
+dnl K7 0.75 1.0 1.0 0.75
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size.
+C
+C This code at 0.75 or 1.0 c/l is always faster than a plain rep movsl at
+C 1.33 c/l.
+C
+C The K7 can do two loads, or two stores, or a load and a store, in one
+C cycle, so if those are 64-bit operations then 0.5 c/l should be possible,
+C however nothing under 0.7 c/l is known.
+C
+C If both source and destination are unaligned then one limb is processed at
+C the start to make them aligned and so get 0.75 c/l, whereas if they'd been
+C used unaligned it would be 1.5 c/l.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl parameter space reused
+define(SAVE_EBX,`PARAM_SIZE')
+
+dnl minimum 5 since the unrolled code can't handle less than 5
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_copyi)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl %ebx, SAVE_EBX
+
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %edx
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ jae L(unroll)
+
+ orl %ecx, %ecx
+ jz L(simple_done)
+
+L(simple):
+ C eax src, incrementing
+ C ebx scratch
+ C ecx counter
+ C edx dst, incrementing
+ C
+ C this loop is 2 cycles/limb
+
+ movl (%eax), %ebx
+ movl %ebx, (%edx)
+ decl %ecx
+ leal 4(%eax), %eax
+ leal 4(%edx), %edx
+ jnz L(simple)
+
+L(simple_done):
+ movl SAVE_EBX, %ebx
+ ret
+
+
+L(unroll):
+ movl %eax, %ebx
+ leal -12(%eax,%ecx,4), %eax C src end - 12
+ subl $3, %ecx C size-3
+
+ andl %edx, %ebx
+ leal (%edx,%ecx,4), %edx C dst end - 12
+ negl %ecx
+
+ testl $4, %ebx C testl to pad code closer to 16 bytes for L(top)
+ jz L(aligned)
+
+ C both src and dst unaligned, process one limb to align them
+ movl (%eax,%ecx,4), %ebx
+ movl %ebx, (%edx,%ecx,4)
+ incl %ecx
+L(aligned):
+
+
+ ALIGN(16)
+L(top):
+ C eax src end - 12
+ C ebx
+ C ecx counter, negative, limbs
+ C edx dst end - 12
+
+ movq (%eax,%ecx,4), %mm0
+ movq 8(%eax,%ecx,4), %mm1
+ addl $4, %ecx
+ movq %mm0, -16(%edx,%ecx,4)
+ movq %mm1, -16+8(%edx,%ecx,4)
+ ja L(top) C jump no carry and not zero
+
+
+ C now %ecx is 0 to 3 representing respectively 3 to 0 limbs remaining
+
+ testb $2, %cl
+ jnz L(finish_not_two)
+
+ movq (%eax,%ecx,4), %mm0
+ movq %mm0, (%edx,%ecx,4)
+L(finish_not_two):
+
+ testb $1, %cl
+ jnz L(done)
+
+ movl 8(%eax), %ebx
+ movl %ebx, 8(%edx)
+
+L(done):
+ movl SAVE_EBX, %ebx
+ emms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm b/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm
new file mode 100644
index 0000000000..483ad6a9a1
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm
@@ -0,0 +1,718 @@
+dnl AMD K7 mpn_divrem_1 -- mpn by limb division.
+dnl
+dnl K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor, mp_limb_t carry);
+C
+C The method and nomenclature follow part 8 of "Division by Invariant
+C Integers using Multiplication" by Granlund and Montgomery, reference in
+C gmp.texi.
+C
+C The "and"s shown in the paper are done here with "cmov"s. "m" is written
+C for m', and "d" for d_norm, which won't cause any confusion since it's
+C only the normalized divisor that's of any use in the code. "b" is written
+C for 2^N, the size of a limb, N being 32 here.
+C
+C mpn_divrem_1 avoids one division if the src high limb is less than the
+C divisor. mpn_divrem_1c doesn't check for a zero carry, since in normal
+C circumstances that will be a very rare event.
+C
+C There's a small bias towards expecting xsize==0, by having code for
+C xsize==0 in a straight line and xsize!=0 under forward jumps.
+
+
+dnl MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl inverse method is used, rather than plain "divl"s. Minimum value 1.
+dnl
+dnl The inverse takes about 50 cycles to calculate, but after that the
+dnl multiply is 17 c/l versus division at 42 c/l.
+dnl
+dnl At 3 limbs the mul is a touch faster than div on the integer part, and
+dnl even more so on the fractional part.
+
+deflit(MUL_THRESHOLD, 3)
+
+
+defframe(PARAM_CARRY, 24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC, 12)
+defframe(PARAM_XSIZE, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC, -28)
+defframe(VAR_DST, -32)
+defframe(VAR_DST_STOP,-36)
+
+deflit(STACK_SPACE, 36)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ leal -4(%edi,%ebx,4), %edi
+ jmp LF(mpn_divrem_1,start_1c)
+
+EPILOGUE()
+
+
+ C offset 0x31, close enough to aligned
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ orl %ecx, %ecx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+ leal -4(%edi,%ebx,4), %edi C &dst[xsize-1]
+
+ jz L(no_skip_div)
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C one less div if high<divisor
+ jnb L(no_skip_div)
+
+ movl $0, (%edi,%ecx,4) C dst high limb
+ decl %ecx C size-1
+ movl %eax, %edx C src high limb as initial carry
+L(no_skip_div):
+
+
+L(start_1c):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ leal (%ebx,%ecx), %eax C size+xsize
+ cmpl $MUL_THRESHOLD, %eax
+ jae L(mul_by_inverse)
+
+
+C With MUL_THRESHOLD set to 3, the simple loops here only do 0 to 2 limbs.
+C It'd be possible to write them out without the looping, but no speedup
+C would be expected.
+C
+C Using PARAM_DIVISOR instead of %ebp measures 1 cycle/loop faster on the
+C integer part, but curiously not on the fractional part, where %ebp is a
+C (fixed) couple of cycles faster.
+
+ orl %ecx, %ecx
+ jz L(divide_no_integer)
+
+L(divide_integer):
+ C eax scratch (quotient)
+ C ebx xsize
+ C ecx counter
+ C edx scratch (remainder)
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl PARAM_DIVISOR
+
+ movl %eax, (%edi,%ecx,4)
+ decl %ecx
+ jnz L(divide_integer)
+
+
+L(divide_no_integer):
+ movl PARAM_DST, %edi
+ orl %ebx, %ebx
+ jnz L(divide_fraction)
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl SAVE_EDI, %edi
+ movl %edx, %eax
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+L(divide_fraction):
+ C eax scratch (quotient)
+ C ebx counter
+ C ecx
+ C edx scratch (remainder)
+ C esi
+ C edi dst
+ C ebp divisor
+
+ movl $0, %eax
+
+ divl %ebp
+
+ movl %eax, -4(%edi,%ebx,4)
+ decl %ebx
+ jnz L(divide_fraction)
+
+ jmp L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ bsrl %ebp, %eax C 31-l
+
+ leal 12(%edi), %ebx
+ leal 4(%edi,%ecx,4), %edi C &dst[xsize+size]
+
+ movl %edi, VAR_DST
+ movl %ebx, VAR_DST_STOP
+
+ movl %ecx, %ebx C size
+ movl $31, %ecx
+
+ movl %edx, %edi C carry
+ movl $-1, %edx
+
+ C
+
+ xorl %eax, %ecx C l
+ incl %eax C 32-l
+
+ shll %cl, %ebp C d normalized
+ movl %ecx, VAR_NORM
+
+ movd %eax, %mm7
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 giving edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ orl %ebx, %ebx C size
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ jz L(start_zero)
+ movl %eax, VAR_SRC
+ cmpl $1, %ebx
+
+ movl 8(%eax), %esi C src high limb
+ jz L(start_one)
+
+L(start_two_or_more):
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ cmpl $2, %ebx
+ je L(integer_two_left)
+ jmp L(integer_top)
+
+
+L(start_one):
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shll %cl, %esi C n10 = high << l
+ movl %eax, VAR_SRC
+ jmp L(integer_one_left)
+
+
+L(start_zero):
+ shll %cl, %edi C n2 = carry << l
+ movl $0, %esi C n10 = 0
+
+ C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then
+ C must have xsize!=0
+ jmp L(fraction_some)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The multiply by inverse loop is 17 cycles, and relies on some out-of-order
+C execution. The instruction scheduling is important, with various
+C apparently equivalent forms running 1 to 5 cycles slower.
+C
+C A lower bound for the time would seem to be 16 cycles, based on the
+C following successive dependencies.
+C
+C cycles
+C n2+n1 1
+C mul 6
+C q1+1 1
+C mul 6
+C sub 1
+C addback 1
+C ---
+C 16
+C
+C This chain is what the loop has already, but 16 cycles isn't achieved.
+C K7 has enough decode, and probably enough execute (depending maybe on what
+C a mul actually consumes), but nothing running under 17 has been found.
+C
+C In theory n2+n1 could be done in the sub and addback stages (by
+C calculating both n2 and n2+n1 there), but lack of registers makes this an
+C unlikely proposition.
+C
+C The jz in the loop keeps the q1+1 stage to 1 cycle. Handling an overflow
+C from q1+1 with an "sbbl $0, %ebx" would add a cycle to the dependent
+C chain, and nothing better than 18 cycles has been found when using it.
+C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will
+C be an extremely rare event.
+C
+C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but
+C if some special data is coming out with this always, the q1_ff special
+C case actually runs at 15 c/l. 0x2FFF...FFFD divided by 3 is a good way to
+C induce the q1_ff case, for speed measurements or testing. Note that
+C 0xFFF...FFF divided by 1 or 2 doesn't induce it.
+C
+C The instruction groupings and empty comments show the cycles for a naive
+C in-order view of the code (conveniently ignoring the load latency on
+C VAR_INVERSE). This shows some of where the time is going, but is nonsense
+C to the extent that out-of-order execution rearranges it. In this case
+C there's 19 cycles shown, but it executes at 17.
+
+ ALIGN(16)
+L(integer_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl VAR_SRC, %ecx
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next limb and the one below it
+ subl $4, %ecx
+
+ movl %ecx, VAR_SRC
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+ movl VAR_DST, %ecx
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+
+ leal -4(%ecx), %ecx
+
+ C
+
+ subl %eax, %esi
+ movl VAR_DST_STOP, %eax
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+ cmpl %eax, %ecx
+
+ movl %ebx, (%ecx)
+ movl %ecx, VAR_DST
+ jne L(integer_top)
+
+
+L(integer_loop_done):
+
+
+C -----------------------------------------------------------------------------
+C
+C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz
+C q1_ff special case. This make the code a bit smaller and simpler, and
+C costs only 1 cycle (each).
+
+L(integer_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl PARAM_SRC, %ecx
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd (%ecx), %mm0 C src low limb
+
+ movl VAR_DST_STOP, %ecx
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+
+ movl %ebx, -4(%ecx)
+
+
+C -----------------------------------------------------------------------------
+L(integer_one_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx dst
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ movl VAR_DST_STOP, %ecx
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx C q1 if q1+1 overflowed
+
+ mull %ebx
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+
+ movl %ebx, -8(%ecx)
+ subl $8, %ecx
+
+
+
+L(integer_none):
+ cmpl $0, PARAM_XSIZE
+ jne L(fraction_some)
+
+ movl %edi, %eax
+L(fraction_done):
+ movl VAR_NORM, %ecx
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EDI, %edi
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBX, %ebx
+ addl $STACK_SPACE, %esp
+
+ shrl %cl, %eax
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx
+ C edx
+ C esi n10
+ C edi n2
+ C ebp divisor
+
+ movl VAR_DST, %ecx
+ movl VAR_DST_STOP, %edx
+ subl $4, %ecx
+
+ psrlq %mm7, %mm0
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ movl %ecx, VAR_DST
+
+ movd %mm0, %esi C next n10
+
+ movl $-1, (%ecx)
+ cmpl %ecx, %edx
+ jne L(integer_top)
+
+ jmp L(integer_loop_done)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C Being the fractional part, the "source" limbs are all zero, meaning
+C n10=0, n1=0, and hence nadj=0, leading to many instructions eliminated.
+C
+C The loop runs at 15 cycles. The dependent chain is the same as the
+C general case above, but without the n2+n1 stage (due to n1==0), so 15
+C would seem to be the lower bound.
+C
+C A not entirely obvious simplification is that q1+1 never overflows a limb,
+C and so there's no need for the sbbl $0 or jz q1_ff from the general case.
+C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always.
+C rnd() means rounding down to a multiple of d.
+C
+C m*n2 + b*n2 <= m*(d-1) + b*(d-1)
+C = m*d + b*d - m - b
+C = floor((b(b-d)-1)/d)*d + b*d - m - b
+C = rnd(b(b-d)-1) + b*d - m - b
+C = rnd(b(b-d)-1 + b*d) - m - b
+C = rnd(b*b-1) - m - b
+C <= (b-2)*b
+C
+C Unchanged from the general case is that the final quotient limb q can be
+C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from
+C equation 8.4 of the paper which simplifies as follows when n1==0 and
+C n0==0.
+C
+C n-q1*d = (n2*k+q0*d)/b <= d + (d*d-2d)/b
+C
+C As before, the instruction groupings and empty comments show a naive
+C in-order view of the code, which is made a nonsense by out of order
+C execution. There's 17 cycles shown, but it executes at 15.
+C
+C Rotating the store q and remainder->n2 instructions up to the top of the
+C loop gets the run time down from 16 to 15.
+
+ ALIGN(16)
+L(fraction_some):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi carry
+ C ebp divisor
+
+ movl PARAM_DST, %esi
+ movl VAR_DST_STOP, %ecx
+ movl %edi, %eax
+
+ subl $8, %ecx
+
+ jmp L(fraction_entry)
+
+
+ ALIGN(16)
+L(fraction_top):
+ C eax n2 carry, then scratch
+ C ebx scratch (nadj, q1)
+ C ecx dst, decrementing
+ C edx scratch
+ C esi dst stop point
+ C edi (will be n2)
+ C ebp divisor
+
+ movl %ebx, (%ecx) C previous q
+ movl %eax, %edi C remainder->n2
+
+L(fraction_entry):
+ mull VAR_INVERSE C m*n2
+
+ movl %ebp, %eax C d
+ subl $4, %ecx C dst
+ leal 1(%edi), %ebx
+
+ C
+
+ C
+
+ C
+
+ C
+
+ addl %edx, %ebx C 1 + high(n2<<32 + m*n2) = q1+1
+
+ mull %ebx C (q1+1)*d
+
+ C
+
+ C
+
+ C
+
+ negl %eax C low of n - (q1+1)*d
+
+ C
+
+ sbbl %edx, %edi C high of n - (q1+1)*d, caring only about carry
+ leal (%ebp,%eax), %edx
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+ cmpl %esi, %ecx
+
+ jne L(fraction_top)
+
+
+ movl %ebx, (%ecx)
+ jmp L(fraction_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/lshift.asm b/rts/gmp/mpn/x86/k7/mmx/lshift.asm
new file mode 100644
index 0000000000..4d17c881ec
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/lshift.asm
@@ -0,0 +1,472 @@
+dnl AMD K7 mpn_lshift -- mpn left shift.
+dnl
+dnl K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 4 1.51
+dnl 8 1.26
+dnl 16 1.21
+dnl 32 1.2
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size left by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the right. The bits shifted out at the left are
+C the return value.
+C
+C The comments in mpn_rshift apply here too.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 10)
+',`
+deflit(UNROLL_THRESHOLD, 10)
+')
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EDI, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+deflit(SAVE_SIZE, 12)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_SRC, %edx
+ subl $SAVE_SIZE, %esp
+deflit(`FRAME',SAVE_SIZE)
+
+ movl PARAM_SHIFT, %ecx
+ movl %edi, SAVE_EDI
+
+ movl PARAM_DST, %edi
+ decl %eax
+ jnz L(more_than_one_limb)
+
+ movl (%edx), %edx
+
+ shldl( %cl, %edx, %eax) C eax was decremented to zero
+
+ shll %cl, %edx
+
+ movl %edx, (%edi)
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(more_than_one_limb):
+ C eax size-1
+ C ebx
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ movd PARAM_SHIFT, %mm6
+ movd (%edx,%eax,4), %mm5 C src high limb
+ cmp $UNROLL_THRESHOLD-1, %eax
+
+ jae L(unroll)
+ negl %ecx
+ movd (%edx), %mm4 C src low limb
+
+ addl $32, %ecx
+
+ movd %ecx, %mm7
+
+L(simple_top):
+ C eax loop counter, limbs
+ C ebx
+ C ecx
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+ C
+ C mm0 scratch
+ C mm4 src low limb
+ C mm5 src high limb
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%edx,%eax,4), %mm0
+ decl %eax
+
+ psrlq %mm7, %mm0
+
+ movd %mm0, 4(%edi,%eax,4)
+ jnz L(simple_top)
+
+
+ psllq %mm6, %mm5
+ psllq %mm6, %mm4
+
+ psrlq $32, %mm5
+ movd %mm4, (%edi) C dst low limb
+
+ movd %mm5, %eax C return value
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx (saved)
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+ C
+ C mm5 src high limb, for return value
+ C mm6 lshift
+
+ movl %esi, SAVE_ESI
+ movl %ebx, SAVE_EBX
+ leal -4(%edx,%eax,4), %edx C &src[size-2]
+
+ testb $4, %dl
+ movq (%edx), %mm1 C src high qword
+
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process high limb (marked xxx) separately to
+ C make it so
+ C
+ C source -4(edx,%eax,4)
+ C |
+ C +-------+-------+-------+--
+ C | xxx |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+ C
+ C dest -4(edi,%eax,4)
+ C |
+ C +-------+-------+--
+ C | xxx | |
+ C +-------+-------+--
+
+ psllq %mm6, %mm1
+ subl $4, %edx
+ movl %eax, PARAM_SIZE C size-1
+
+ psrlq $32, %mm1
+ decl %eax C size-2 is new size-1
+
+ movd %mm1, 4(%edi,%eax,4)
+ movq (%edx), %mm1 C new src high qword
+L(start_src_aligned):
+
+
+ leal -4(%edi,%eax,4), %edi C &dst[size-2]
+ psllq %mm6, %mm5
+
+ testl $4, %edi
+ psrlq $32, %mm5 C return value
+
+ jz L(start_dst_aligned)
+
+
+ C dst isn't aligned, subtract 4 bytes to make it so, and pretend the
+ C shift is 32 bits extra. High limb of dst (marked xxx) handled
+ C here separately.
+ C
+ C source %edx
+ C +-------+-------+--
+ C | mm1 |
+ C +-------+-------+--
+ C 0mod8 4mod8
+ C
+ C dest %edi
+ C +-------+-------+-------+--
+ C | xxx |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+
+ movq %mm1, %mm0
+ psllq %mm6, %mm1
+ addl $32, %ecx C shift+32
+
+ psrlq $32, %mm1
+
+ movd %mm1, 4(%edi)
+ movq %mm0, %mm1
+ subl $4, %edi
+
+ movd %ecx, %mm6 C new lshift
+L(start_dst_aligned):
+
+ decl %eax C size-2, two last limbs handled at end
+ movq %mm1, %mm2 C copy of src high qword
+ negl %ecx
+
+ andl $-2, %eax C round size down to even
+ addl $64, %ecx
+
+ movl %eax, %ebx
+ negl %eax
+
+ andl $UNROLL_MASK, %eax
+ decl %ebx
+
+ shll %eax
+
+ movd %ecx, %mm7 C rshift = 64-lshift
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%eax,%eax,4), %esi
+')
+ shrl $UNROLL_LOG2, %ebx C loop counter
+
+ leal ifelse(UNROLL_BYTES,256,128) -8(%edx,%eax,2), %edx
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi
+ movl PARAM_SIZE, %eax C for use at end
+ jmp *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%eax,%eax,4), %esi
+ addl $L(entry)-L(here), %esi
+ addl (%esp), %esi
+
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(top):
+ C eax size (for use at end)
+ C ebx loop counter
+ C ecx rshift
+ C edx src
+ C esi computed jump
+ C edi dst
+ C ebp
+ C
+ C mm0 scratch
+ C mm1 \ carry (alternating, mm2 first)
+ C mm2 /
+ C mm6 lshift
+ C mm7 rshift
+ C
+ C 10 code bytes/limb
+ C
+ C The two chunks differ in whether mm1 or mm2 hold the carry.
+ C The computed jump puts the initial carry in both mm1 and mm2.
+
+L(entry):
+deflit(CHUNK_COUNT, 4)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 - 8))
+
+ movq disp0(%edx), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm2, %mm0
+ movq %mm0, disp0(%edi)
+
+
+ movq disp1(%edx), %mm0
+ psllq %mm6, %mm1
+
+ movq %mm0, %mm2
+ psrlq %mm7, %mm0
+
+ por %mm1, %mm0
+ movq %mm0, disp1(%edi)
+')
+
+ subl $UNROLL_BYTES, %edx
+ subl $UNROLL_BYTES, %edi
+ decl %ebx
+
+ jns L(top)
+
+
+
+define(`disp', `m4_empty_if_zero(eval($1 ifelse(UNROLL_BYTES,256,-128)))')
+
+L(end):
+ testb $1, %al
+ movl SAVE_EBX, %ebx
+ psllq %mm6, %mm2 C wanted left shifted in all cases below
+
+ movd %mm5, %eax
+
+ movl SAVE_ESI, %esi
+ jz L(end_even)
+
+
+L(end_odd):
+
+ C Size odd, destination was aligned.
+ C
+ C source edx+8 edx+4
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edi
+ C --+---------------+---------------+-------+
+ C | written | | |
+ C --+---------------+---------------+-------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size odd, destination was unaligned.
+ C
+ C source edx+8 edx+4
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edi
+ C --+---------------+---------------+
+ C | written | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword at (%edi), and in the aligned case
+ C there's an extra limb of dst to be formed from that extra src limb
+ C left shifted.
+
+ movd disp(4) (%edx), %mm0
+ testb $32, %cl
+
+ movq %mm0, %mm1
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+ psllq %mm6, %mm1
+
+ por %mm2, %mm0
+
+ movq %mm0, disp(0) (%edi)
+ jz L(end_odd_unaligned)
+ movd %mm1, disp(-4) (%edi)
+L(end_odd_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+L(end_even):
+
+ C Size even, destination was aligned.
+ C
+ C source edx+8
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edi
+ C --+---------------+---------------+
+ C | written | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size even, destination was unaligned.
+ C
+ C source edx+8
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edi+4
+ C --+---------------+-------+
+ C | written | |
+ C --+---------------+-------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C The movq for the aligned case overwrites the movd for the
+ C unaligned case.
+
+ movq %mm2, %mm0
+ psrlq $32, %mm2
+
+ testb $32, %cl
+ movd %mm2, disp(4) (%edi)
+
+ jz L(end_even_unaligned)
+ movq %mm0, disp(0) (%edi)
+L(end_even_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/mod_1.asm b/rts/gmp/mpn/x86/k7/mmx/mod_1.asm
new file mode 100644
index 0000000000..545ca56ddf
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/mod_1.asm
@@ -0,0 +1,457 @@
+dnl AMD K7 mpn_mod_1 -- mpn by limb remainder.
+dnl
+dnl K7: 17.0 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C The code here is the same as mpn_divrem_1, but with the quotient
+C discarded. See mpn/x86/k7/mmx/divrem_1.c for some comments.
+
+
+dnl MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+dnl
+dnl The inverse takes about 50 cycles to calculate, but after that the
+dnl multiply is 17 c/l versus division at 41 c/l.
+dnl
+dnl Using mul or div is about the same speed at 3 limbs, so the threshold
+dnl is set to 4 to get the smaller div code used at 3.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC_STOP,-28)
+
+deflit(STACK_SPACE, 28)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ jmp LF(mpn_mod_1,start_1c)
+
+EPILOGUE()
+
+
+ ALIGN(32)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C carry flag if high<divisor
+
+ cmovc( %eax, %edx) C src high limb as initial carry
+ sbbl $0, %ecx C size-1 to skip one div
+ jz L(divide_done)
+
+
+ ALIGN(16)
+L(start_1c):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ cmpl $MUL_THRESHOLD, %ecx
+ jae L(mul_by_inverse)
+
+
+
+C With a MUL_THRESHOLD of 4, this "loop" only ever does 1 to 3 iterations,
+C but it's already fast and compact, and there's nothing to gain by
+C expanding it out.
+C
+C Using PARAM_DIVISOR in the divl is a couple of cycles faster than %ebp.
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl PARAM_DIVISOR
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ movl %edx, %eax
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ bsrl %ebp, %eax C 31-l
+
+ movl %ebx, SAVE_EBX
+ leal -4(%esi), %ebx
+
+ movl %ebx, VAR_SRC_STOP
+ movl %edi, SAVE_EDI
+
+ movl %ecx, %ebx C size
+ movl $31, %ecx
+
+ movl %edx, %edi C carry
+ movl $-1, %edx
+
+ C
+
+ xorl %eax, %ecx C l
+ incl %eax C 32-l
+
+ shll %cl, %ebp C d normalized
+ movl %ecx, VAR_NORM
+
+ movd %eax, %mm7
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ C
+
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl 8(%eax), %esi C src high limb
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ movl %eax, %ecx C &src[size-3]
+
+
+ifelse(MUL_THRESHOLD,2,`
+ cmpl $2, %ebx
+ je L(inverse_two_left)
+')
+
+
+C The dependent chain here is the same as in mpn_divrem_1, but a few
+C instructions are saved by not needing to store the quotient limbs.
+C Unfortunately this doesn't get the code down to the theoretical 16 c/l.
+C
+C There's four dummy instructions in the loop, all of which are necessary
+C for the claimed 17 c/l. It's a 1 to 3 cycle slowdown if any are removed,
+C or changed from load to store or vice versa. They're not completely
+C random, since they correspond to what mpn_divrem_1 has, but there's no
+C obvious reason why they're necessary. Presumably they induce something
+C good in the out of order execution, perhaps through some load/store
+C ordering and/or decoding effects.
+C
+C The q1==0xFFFFFFFF case is handled here the same as in mpn_divrem_1. On
+C on special data that comes out as q1==0xFFFFFFFF always, the loop runs at
+C about 13.5 c/l.
+
+ ALIGN(32)
+L(inverse_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx src pointer, decrementing
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl PARAM_SIZE, %ebx C dummy
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next src limb and the one below it
+ subl $4, %ecx
+
+ movl %ecx, PARAM_SIZE C dummy
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+ nop C dummy
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+ leal 0(%ecx), %ecx C dummy
+
+ C
+
+ C
+
+ subl %eax, %esi
+ movl VAR_SRC_STOP, %eax
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ cmpl %eax, %ecx
+ jne L(inverse_top)
+
+
+L(inverse_loop_done):
+
+
+C -----------------------------------------------------------------------------
+
+L(inverse_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx &src[-1]
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src dword)
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd 4(%ecx), %mm0 C src low limb
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+
+
+C One limb left
+
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movl VAR_NORM, %ecx C for final denorm
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ movl SAVE_EBX, %ebx
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ movl %esi, %eax C remainder
+ movl SAVE_ESI, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ leal (%ebp,%eax), %edx
+ movl SAVE_EBP, %ebp
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ movl SAVE_EDI, %edi
+
+ shrl %cl, %eax C denorm remainder
+ addl $STACK_SPACE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx src pointer
+ C edx
+ C esi n10
+ C edi (n2)
+ C ebp divisor
+
+ movl VAR_SRC_STOP, %edx
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ psrlq %mm7, %mm0
+
+ movd %mm0, %esi C next n10
+
+ cmpl %ecx, %edx
+ jne L(inverse_top)
+ jmp L(inverse_loop_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/popham.asm b/rts/gmp/mpn/x86/k7/mmx/popham.asm
new file mode 100644
index 0000000000..fa7c8c04a5
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/popham.asm
@@ -0,0 +1,239 @@
+dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming
+dnl distance.
+dnl
+dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on
+dnl FreeBSD 3.3 and 3.4 doesn't recognise it.
+
+define(psadbw_mm4_mm0,
+`ifelse(m4_ifdef_anyof_p(`HAVE_TARGET_CPU_athlon',
+ `HAVE_TARGET_CPU_pentium3'),1,
+ `.byte 0x0f,0xf6,0xc4 C psadbw %mm4, %mm0',
+
+`m4_warning(`warning, using simulated and only partly functional psadbw, use for testing only
+') C this works enough for the sum of bytes done below, making it
+ C possible to test on an older cpu
+ leal -8(%esp), %esp
+ movq %mm4, (%esp)
+ movq %mm0, %mm4
+forloop(i,1,7,
+` psrlq $ 8, %mm4
+ paddb %mm4, %mm0
+')
+ pushl $ 0
+ pushl $ 0xFF
+ pand (%esp), %mm0
+ movq 8(%esp), %mm4
+ leal 16(%esp), %esp
+')')
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C The code here is almost certainly not optimal, but is already a 3x speedup
+C over the generic C code. The main improvement would be to interleave
+C processing of two qwords in the loop so as to fully exploit the available
+C execution units, possibly leading to 3.25 c/l (13 cycles for 4 limbs).
+C
+C The loop is based on the example "Efficient 64-bit population count using
+C MMX instructions" in the Athlon Optimization Guide, AMD document 22007,
+C page 158 of rev E (reference in mpn/x86/k7/README).
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist defined
+')')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+ dnl non-PIC
+
+ DATA
+ ALIGN(8)
+
+define(LS,
+m4_assert_numargs(1)
+`LF(M4_function,`$1')')
+
+LS(rodata_AAAAAAAAAAAAAAAA):
+ .long 0xAAAAAAAA
+ .long 0xAAAAAAAA
+
+LS(rodata_3333333333333333):
+ .long 0x33333333
+ .long 0x33333333
+
+LS(rodata_0F0F0F0F0F0F0F0F):
+ .long 0x0F0F0F0F
+ .long 0x0F0F0F0F
+')
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ orl %ecx, %ecx
+ jz L(zero)
+
+ifdef(`PIC',`
+ movl $0xAAAAAAAA, %eax
+ movl $0x33333333, %edx
+
+ movd %eax, %mm7
+ movd %edx, %mm6
+
+ movl $0x0F0F0F0F, %eax
+
+ punpckldq %mm7, %mm7
+ punpckldq %mm6, %mm6
+
+ movd %eax, %mm5
+ movd %edx, %mm4
+
+ punpckldq %mm5, %mm5
+
+',`
+ movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7
+ movq LS(rodata_3333333333333333), %mm6
+ movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5
+')
+ pxor %mm4, %mm4
+
+define(REG_AAAAAAAAAAAAAAAA,%mm7)
+define(REG_3333333333333333,%mm6)
+define(REG_0F0F0F0F0F0F0F0F,%mm5)
+define(REG_0000000000000000,%mm4)
+
+
+ movl PARAM_SRC, %eax
+HAM(` movl PARAM_SRC2, %edx')
+
+ pxor %mm2, %mm2 C total
+
+ shrl %ecx
+ jnc L(top)
+
+ movd (%eax,%ecx,8), %mm1
+
+HAM(` movd 0(%edx,%ecx,8), %mm0
+ pxor %mm0, %mm1
+')
+ orl %ecx, %ecx
+ jmp L(loaded)
+
+
+ ALIGN(16)
+L(top):
+ C eax src
+ C ebx
+ C ecx counter, qwords, decrementing
+ C edx [hamdist] src2
+ C
+ C mm0 (scratch)
+ C mm1 (scratch)
+ C mm2 total (low dword)
+ C mm3
+ C mm4 \
+ C mm5 | special constants
+ C mm6 |
+ C mm7 /
+
+ movq -8(%eax,%ecx,8), %mm1
+
+HAM(` pxor -8(%edx,%ecx,8), %mm1')
+ decl %ecx
+
+L(loaded):
+ movq %mm1, %mm0
+ pand REG_AAAAAAAAAAAAAAAA, %mm1
+
+ psrlq $1, %mm1
+
+ psubd %mm1, %mm0 C bit pairs
+
+
+ movq %mm0, %mm1
+ psrlq $2, %mm0
+
+ pand REG_3333333333333333, %mm0
+ pand REG_3333333333333333, %mm1
+
+ paddd %mm1, %mm0 C nibbles
+
+
+ movq %mm0, %mm1
+ psrlq $4, %mm0
+
+ pand REG_0F0F0F0F0F0F0F0F, %mm0
+ pand REG_0F0F0F0F0F0F0F0F, %mm1
+
+ paddd %mm1, %mm0 C bytes
+
+
+ psadbw_mm4_mm0
+
+ paddd %mm0, %mm2 C add to total
+ jnz L(top)
+
+
+ movd %mm2, %eax
+ emms
+ ret
+
+
+L(zero):
+ movl $0, %eax
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/rshift.asm b/rts/gmp/mpn/x86/k7/mmx/rshift.asm
new file mode 100644
index 0000000000..abb546cd5b
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/rshift.asm
@@ -0,0 +1,471 @@
+dnl AMD K7 mpn_rshift -- mpn right shift.
+dnl
+dnl K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 4 1.51
+dnl 8 1.26
+dnl 16 1.21
+dnl 32 1.2
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size right by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the left. The bits shifted out at the right are
+C the return value.
+C
+C This code uses 64-bit MMX operations, which makes it possible to handle
+C two limbs at a time, for a theoretical 1.0 cycles/limb. Plain integer
+C code, on the other hand, suffers from shrd being a vector path decode and
+C running at 3 cycles back-to-back.
+C
+C Full speed depends on source and destination being aligned, and some hairy
+C setups and finish-ups are done to arrange this for the loop.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 10)
+',`
+deflit(UNROLL_THRESHOLD, 10)
+')
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EDI, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+deflit(SAVE_SIZE, 12)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_SRC, %edx
+ subl $SAVE_SIZE, %esp
+deflit(`FRAME',SAVE_SIZE)
+
+ movl PARAM_SHIFT, %ecx
+ movl %edi, SAVE_EDI
+
+ movl PARAM_DST, %edi
+ decl %eax
+ jnz L(more_than_one_limb)
+
+ movl (%edx), %edx C src limb
+
+ shrdl( %cl, %edx, %eax) C eax was decremented to zero
+
+ shrl %cl, %edx
+
+ movl %edx, (%edi) C dst limb
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(more_than_one_limb):
+ C eax size-1
+ C ebx
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ movd PARAM_SHIFT, %mm6 C rshift
+ movd (%edx), %mm5 C src low limb
+ cmp $UNROLL_THRESHOLD-1, %eax
+
+ jae L(unroll)
+ leal (%edx,%eax,4), %edx C &src[size-1]
+ leal -4(%edi,%eax,4), %edi C &dst[size-2]
+
+ movd (%edx), %mm4 C src high limb
+ negl %eax
+
+
+L(simple_top):
+ C eax loop counter, limbs, negative
+ C ebx
+ C ecx shift
+ C edx carry
+ C edx &src[size-1]
+ C edi &dst[size-2]
+ C ebp
+ C
+ C mm0 scratch
+ C mm4 src high limb
+ C mm5 src low limb
+ C mm6 shift
+
+ movq (%edx,%eax,4), %mm0
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+ movd %mm0, (%edi,%eax,4)
+ jnz L(simple_top)
+
+
+ psllq $32, %mm5
+ psrlq %mm6, %mm4
+
+ psrlq %mm6, %mm5
+ movd %mm4, 4(%edi) C dst high limb
+
+ movd %mm5, %eax C return value
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+ C
+ C mm5 src low limb
+ C mm6 rshift
+
+ testb $4, %dl
+ movl %esi, SAVE_ESI
+ movl %ebx, SAVE_EBX
+
+ psllq $32, %mm5
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process low limb separately (marked xxx) and
+ C step src and dst by one limb, making src aligned.
+ C
+ C source edx
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+ C
+ C dest edi
+ C --+-------+-------+
+ C | | xxx |
+ C --+-------+-------+
+
+ movq (%edx), %mm0 C src low two limbs
+ addl $4, %edx
+ movl %eax, PARAM_SIZE C size-1
+
+ addl $4, %edi
+ decl %eax C size-2 is new size-1
+
+ psrlq %mm6, %mm0
+ movl %edi, PARAM_DST C new dst
+
+ movd %mm0, -4(%edi)
+L(start_src_aligned):
+
+
+ movq (%edx), %mm1 C src low two limbs
+ decl %eax C size-2, two last limbs handled at end
+ testl $4, %edi
+
+ psrlq %mm6, %mm5
+ jz L(start_dst_aligned)
+
+
+ C dst isn't aligned, add 4 to make it so, and pretend the shift is
+ C 32 bits extra. Low limb of dst (marked xxx) handled here separately.
+ C
+ C source edx
+ C --+-------+-------+
+ C | mm1 |
+ C --+-------+-------+
+ C 4mod8 0mod8
+ C
+ C dest edi
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+
+ movq %mm1, %mm0
+ psrlq %mm6, %mm1
+ addl $32, %ecx C shift+32
+
+ movd %mm1, (%edi)
+ movq %mm0, %mm1
+ addl $4, %edi C new dst
+
+ movd %ecx, %mm6
+L(start_dst_aligned):
+
+
+ movq %mm1, %mm2 C copy of src low two limbs
+ negl %ecx
+ andl $-2, %eax C round size down to even
+
+ movl %eax, %ebx
+ negl %eax
+ addl $64, %ecx
+
+ andl $UNROLL_MASK, %eax
+ decl %ebx
+
+ shll %eax
+
+ movd %ecx, %mm7 C lshift = 64-rshift
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%eax,%eax,4), %esi
+ negl %eax
+')
+ shrl $UNROLL_LOG2, %ebx C loop counter
+
+ leal ifelse(UNROLL_BYTES,256,128+) 8(%edx,%eax,2), %edx
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi
+ movl PARAM_SIZE, %eax C for use at end
+
+ jmp *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%eax,%eax,4), %esi
+ addl $L(entry)-L(here), %esi
+ addl (%esp), %esi
+ negl %eax
+
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(64)
+L(top):
+ C eax size, for use at end
+ C ebx loop counter
+ C ecx lshift
+ C edx src
+ C esi was computed jump
+ C edi dst
+ C ebp
+ C
+ C mm0 scratch
+ C mm1 \ carry (alternating)
+ C mm2 /
+ C mm6 rshift
+ C mm7 lshift
+ C
+ C 10 code bytes/limb
+ C
+ C The two chunks differ in whether mm1 or mm2 hold the carry.
+ C The computed jump puts the initial carry in both mm1 and mm2.
+
+L(entry):
+deflit(CHUNK_COUNT, 4)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 8))
+
+ movq disp0(%edx), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ por %mm2, %mm0
+ movq %mm0, disp0(%edi)
+
+
+ movq disp1(%edx), %mm0
+ psrlq %mm6, %mm1
+
+ movq %mm0, %mm2
+ psllq %mm7, %mm0
+
+ por %mm1, %mm0
+ movq %mm0, disp1(%edi)
+')
+
+ addl $UNROLL_BYTES, %edx
+ addl $UNROLL_BYTES, %edi
+ decl %ebx
+
+ jns L(top)
+
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 8))
+
+ testb $1, %al
+ psrlq %mm6, %mm2 C wanted rshifted in all cases below
+ movl SAVE_ESI, %esi
+
+ movd %mm5, %eax C return value
+
+ movl SAVE_EBX, %ebx
+ jz L(end_even)
+
+
+ C Size odd, destination was aligned.
+ C
+ C source
+ C edx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edi
+ C +-------+---------------+---------------+--
+ C | | | written |
+ C +-------+---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size odd, destination was unaligned.
+ C
+ C source
+ C edx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edi
+ C +---------------+---------------+--
+ C | | written |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword to store, and in the aligned case there's
+ C a further extra limb of dst to be formed.
+
+
+ movd disp0(%edx), %mm0
+ movq %mm0, %mm1
+
+ psllq %mm7, %mm0
+ testb $32, %cl
+
+ por %mm2, %mm0
+ psrlq %mm6, %mm1
+
+ movq %mm0, disp0(%edi)
+ jz L(finish_odd_unaligned)
+
+ movd %mm1, disp1(%edi)
+L(finish_odd_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+L(end_even):
+
+ C Size even, destination was aligned.
+ C
+ C source
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edi
+ C +---------------+---------------+--
+ C | | mm3 |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size even, destination was unaligned.
+ C
+ C source
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edi
+ C +-------+---------------+--
+ C | | mm3 |
+ C +-------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = 64-(shift+32)
+
+
+ C The movd for the unaligned case is the same data as the movq for
+ C the aligned case, it's just a choice between whether one or two
+ C limbs should be written.
+
+
+ testb $32, %cl
+ movd %mm2, disp0(%edi)
+
+ jz L(end_even_unaligned)
+
+ movq %mm2, disp0(%edi)
+L(end_even_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mul_1.asm b/rts/gmp/mpn/x86/k7/mul_1.asm
new file mode 100644
index 0000000000..07f7085b10
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mul_1.asm
@@ -0,0 +1,265 @@
+dnl AMD K7 mpn_mul_1 -- mpn by limb multiply.
+dnl
+dnl K7: 3.4 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 8 3.9
+dnl 16 3.4
+dnl 32 3.4
+dnl 64 3.35
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier, mp_limb_t carry);
+C
+C Multiply src,size by mult and store the result in dst,size.
+C Return the carry limb from the top of the result.
+C
+C mpn_mul_1c() accepts an initial carry for the calculation, it's added into
+C the low limb of the destination.
+C
+C Variations on the unrolled loop have been tried, with the current
+C registers or with the counter on the stack to free up ecx. The current
+C code is the fastest found.
+C
+C An interesting effect is that removing the stores "movl %ebx, disp0(%edi)"
+C from the unrolled loop actually slows it down to 5.0 cycles/limb. Code
+C with this change can be tested on sizes of the form UNROLL_COUNT*n+1
+C without having to change the computed jump. There's obviously something
+C fishy going on, perhaps with what execution units the mul needs.
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBP, -4)
+defframe(SAVE_EDI, -8)
+defframe(SAVE_ESI, -12)
+defframe(SAVE_EBX, -16)
+deflit(STACK_SPACE, 16)
+
+dnl Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1.
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 7)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_mul_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ jmp LF(mpn_mul_1,start_nc)
+EPILOGUE()
+
+
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+ xorl %edx, %edx C initial carry
+L(start_nc):
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME', STACK_SPACE)
+
+ movl %edi, SAVE_EDI
+ movl %ebx, SAVE_EBX
+ movl %edx, %ebx
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_DST, %edi
+ movl %ebp, SAVE_EBP
+ jae L(unroll)
+
+ leal (%esi,%ecx,4), %esi
+ leal (%edi,%ecx,4), %edi
+ negl %ecx
+
+ movl PARAM_MULTIPLIER, %ebp
+
+L(simple):
+ C eax scratch
+ C ebx carry
+ C ecx counter (negative)
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl %eax, (%edi,%ecx,4)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ incl %ecx
+ jnz L(simple)
+
+ movl %ebx, %eax
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C The mov to load the next source limb is done well ahead of the mul, this
+C is necessary for full speed. It leads to one limb handled separately
+C after the loop.
+C
+C When unrolling to 32 or more, an offset of +4 is used on the src pointer,
+C to avoid having an 0x80 displacement in the code for the last limb in the
+C unrolled loop. This is for a fair comparison between 16 and 32 unrolling.
+
+ifelse(eval(UNROLL_COUNT >= 32),1,`
+deflit(SRC_OFFSET,4)
+',`
+deflit(SRC_OFFSET,)
+')
+
+ C this is offset 0x62, so close enough to aligned
+L(unroll):
+ C eax
+ C ebx initial carry
+ C ecx size
+ C edx
+ C esi src
+ C edi dst
+ C ebp
+deflit(`FRAME', STACK_SPACE)
+
+ leal -1(%ecx), %edx C one limb handled at end
+ leal -2(%ecx), %ecx C and ecx is one less than edx
+ movl %ebp, SAVE_EBP
+
+ negl %edx
+ shrl $UNROLL_LOG2, %ecx C unrolled loop counter
+ movl (%esi), %eax C src low limb
+
+ andl $UNROLL_MASK, %edx
+ movl PARAM_DST, %edi
+
+ movl %edx, %ebp
+ shll $4, %edx
+
+ C 17 code bytes per limb
+ifdef(`PIC',`
+ call L(add_eip_to_edx)
+L(here):
+',`
+ leal L(entry) (%edx,%ebp), %edx
+')
+ negl %ebp
+
+ leal ifelse(UNROLL_BYTES,256,128+) SRC_OFFSET(%esi,%ebp,4), %esi
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%ebp,4), %edi
+ movl PARAM_MULTIPLIER, %ebp
+
+ jmp *%edx
+
+
+ifdef(`PIC',`
+L(add_eip_to_edx):
+ C See README.family about old gas bugs
+ leal (%edx,%ebp), %edx
+ addl $L(entry)-L(here), %edx
+ addl (%esp), %edx
+ ret
+')
+
+
+C ----------------------------------------------------------------------------
+ ALIGN(32)
+L(top):
+ C eax next src limb
+ C ebx carry
+ C ecx counter
+ C edx scratch
+ C esi src+4
+ C edi dst
+ C ebp multiplier
+ C
+ C 17 code bytes per limb processed
+
+L(entry):
+forloop(i, 0, UNROLL_COUNT-1, `
+ deflit(`disp_dst', eval(i*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp_src', eval(disp_dst + 4-(SRC_OFFSET-0)))
+
+ mull %ebp
+
+ addl %eax, %ebx
+Zdisp( movl, disp_src,(%esi), %eax)
+Zdisp( movl, %ebx, disp_dst,(%edi))
+
+ movl $0, %ebx
+ adcl %edx, %ebx
+')
+
+ decl %ecx
+
+ leal UNROLL_BYTES(%esi), %esi
+ leal UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+
+ mull %ebp
+
+ addl %eax, %ebx
+ movl $0, %eax
+ movl SAVE_ESI, %esi
+
+ movl %ebx, disp0(%edi)
+ movl SAVE_EBX, %ebx
+ movl SAVE_EDI, %edi
+
+ adcl %edx, %eax
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mul_basecase.asm b/rts/gmp/mpn/x86/k7/mul_basecase.asm
new file mode 100644
index 0000000000..c4be62e633
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mul_basecase.asm
@@ -0,0 +1,593 @@
+dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers.
+dnl
+dnl K7: approx 4.42 cycles per cross product at around 20x20 limbs (16
+dnl limbs/loop unrolling).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7 UNROLL_COUNT cycles/product (at around 20x20)
+dnl 8 4.67
+dnl 16 4.59
+dnl 32 4.42
+dnl Maximum possible with the current code is 32.
+dnl
+dnl At 32 the typical 13-26 limb sizes from the karatsuba code will get
+dnl done with a straight run through a block of code, no inner loop. Using
+dnl 32 gives 1k of code, but the k7 has a 64k L1 code cache.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+C
+C Calculate xp,xsize multiplied by yp,ysize, storing the result in
+C wp,xsize+ysize.
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() startup
+C calculations only once. The saving is 15-25% on typical sizes coming from
+C the Karatsuba multiply code.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_XSIZE, %ecx
+ movl PARAM_YP, %eax
+
+ movl PARAM_XP, %edx
+ movl (%eax), %eax C yp low limb
+
+ cmpl $2, %ecx
+ ja L(xsize_more_than_two)
+ je L(two_by_something)
+
+
+ C one limb by one limb
+
+ mull (%edx)
+
+ movl PARAM_WP, %ecx
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+deflit(`FRAME',0)
+ decl PARAM_YSIZE
+ pushl %ebx defframe_pushl(`SAVE_EBX')
+ movl %eax, %ecx C yp low limb
+
+ movl PARAM_WP, %ebx
+ pushl %esi defframe_pushl(`SAVE_ESI')
+ movl %edx, %esi C xp
+
+ movl (%edx), %eax C xp low limb
+ jnz L(two_by_two)
+
+
+ C two limbs by one limb
+
+ mull %ecx
+
+ movl %eax, (%ebx)
+ movl 4(%esi), %eax
+ movl %edx, %esi C carry
+
+ mull %ecx
+
+ addl %eax, %esi
+
+ movl %esi, 4(%ebx)
+ movl SAVE_ESI, %esi
+
+ adcl $0, %edx
+
+ movl %edx, 8(%ebx)
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+C Could load yp earlier into another register.
+
+ ALIGN(16)
+L(two_by_two):
+ C eax xp low limb
+ C ebx wp
+ C ecx yp low limb
+ C edx
+ C esi xp
+ C edi
+ C ebp
+
+dnl FRAME carries on from previous
+
+ mull %ecx C xp[0] * yp[0]
+
+ push %edi defframe_pushl(`SAVE_EDI')
+ movl %edx, %edi C carry, for wp[1]
+
+ movl %eax, (%ebx)
+ movl 4(%esi), %eax
+
+ mull %ecx C xp[1] * yp[0]
+
+ addl %eax, %edi
+ movl PARAM_YP, %ecx
+
+ adcl $0, %edx
+ movl 4(%ecx), %ecx C yp[1]
+ movl %edi, 4(%ebx)
+
+ movl 4(%esi), %eax C xp[1]
+ movl %edx, %edi C carry, for wp[2]
+
+ mull %ecx C xp[1] * yp[1]
+
+ addl %eax, %edi
+
+ adcl $0, %edx
+ movl (%esi), %eax C xp[0]
+
+ movl %edx, %esi C carry, for wp[3]
+
+ mull %ecx C xp[0] * yp[1]
+
+ addl %eax, 4(%ebx)
+ adcl %edx, %edi
+ movl %edi, 8(%ebx)
+
+ adcl $0, %esi
+ movl SAVE_EDI, %edi
+ movl %esi, 12(%ebx)
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(xsize_more_than_two):
+
+C The first limb of yp is processed with a simple mpn_mul_1 style loop
+C inline. Unrolling this doesn't seem worthwhile since it's only run once
+C (whereas the addmul below is run ysize-1 many times). A call to the
+C actual mpn_mul_1 will be slowed down by the call and parameter pushing and
+C popping, and doesn't seem likely to be worthwhile on the typical 13-26
+C limb operations the Karatsuba code calls here with.
+
+ C eax yp[0]
+ C ebx
+ C ecx xsize
+ C edx xp
+ C esi
+ C edi
+ C ebp
+
+dnl FRAME doesn't carry on from previous, no pushes yet here
+defframe(`SAVE_EBX',-4)
+defframe(`SAVE_ESI',-8)
+defframe(`SAVE_EDI',-12)
+defframe(`SAVE_EBP',-16)
+deflit(`FRAME',0)
+
+ subl $16, %esp
+deflit(`FRAME',16)
+
+ movl %edi, SAVE_EDI
+ movl PARAM_WP, %edi
+
+ movl %ebx, SAVE_EBX
+ movl %ebp, SAVE_EBP
+ movl %eax, %ebp
+
+ movl %esi, SAVE_ESI
+ xorl %ebx, %ebx
+ leal (%edx,%ecx,4), %esi C xp end
+
+ leal (%edi,%ecx,4), %edi C wp end of mul1
+ negl %ecx
+
+
+L(mul1):
+ C eax scratch
+ C ebx carry
+ C ecx counter, negative
+ C edx scratch
+ C esi xp end
+ C edi wp end of mul1
+ C ebp multiplier
+
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl %eax, (%edi,%ecx,4)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ incl %ecx
+ jnz L(mul1)
+
+
+ movl PARAM_YSIZE, %edx
+ movl PARAM_XSIZE, %ecx
+
+ movl %ebx, (%edi) C final carry
+ decl %edx
+
+ jnz L(ysize_more_than_one)
+
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+
+ movl SAVE_EBP, %ebp
+ movl SAVE_ESI, %esi
+ addl $FRAME, %esp
+
+ ret
+
+
+L(ysize_more_than_one):
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_YP, %eax
+
+ jae L(unroll)
+
+
+C -----------------------------------------------------------------------------
+ C simple addmul looping
+ C
+ C eax yp
+ C ebx
+ C ecx xsize
+ C edx ysize-1
+ C esi xp end
+ C edi wp end of mul1
+ C ebp
+
+ leal 4(%eax,%edx,4), %ebp C yp end
+ negl %ecx
+ negl %edx
+
+ movl (%esi,%ecx,4), %eax C xp low limb
+ movl %edx, PARAM_YSIZE C -(ysize-1)
+ incl %ecx
+
+ xorl %ebx, %ebx C initial carry
+ movl %ecx, PARAM_XSIZE C -(xsize-1)
+ movl %ebp, PARAM_YP
+
+ movl (%ebp,%edx,4), %ebp C yp second lowest limb - multiplier
+ jmp L(simple_outer_entry)
+
+
+ C this is offset 0x121 so close enough to aligned
+L(simple_outer_top):
+ C ebp ysize counter, negative
+
+ movl PARAM_YP, %edx
+ movl PARAM_XSIZE, %ecx C -(xsize-1)
+ xorl %ebx, %ebx C carry
+
+ movl %ebp, PARAM_YSIZE
+ addl $4, %edi C next position in wp
+
+ movl (%edx,%ebp,4), %ebp C yp limb - multiplier
+ movl -4(%esi,%ecx,4), %eax C xp low limb
+
+
+L(simple_outer_entry):
+
+L(simple_inner):
+ C eax xp limb
+ C ebx carry limb
+ C ecx loop counter (negative)
+ C edx scratch
+ C esi xp end
+ C edi wp end
+ C ebp multiplier
+
+ mull %ebp
+
+ addl %eax, %ebx
+ adcl $0, %edx
+
+ addl %ebx, (%edi,%ecx,4)
+ movl (%esi,%ecx,4), %eax
+ adcl $0, %edx
+
+ incl %ecx
+ movl %edx, %ebx
+ jnz L(simple_inner)
+
+
+ mull %ebp
+
+ movl PARAM_YSIZE, %ebp
+ addl %eax, %ebx
+
+ adcl $0, %edx
+ addl %ebx, (%edi)
+
+ adcl $0, %edx
+ incl %ebp
+
+ movl %edx, 4(%edi)
+ jnz L(simple_outer_top)
+
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The unrolled loop is the same as in mpn_addmul_1(), see that code for some
+C comments.
+C
+C VAR_ADJUST is the negative of how many limbs the leals in the inner loop
+C increment xp and wp. This is used to adjust back xp and wp, and rshifted
+C to given an initial VAR_COUNTER at the top of the outer loop.
+C
+C VAR_COUNTER is for the unrolled loop, running from VAR_ADJUST/UNROLL_COUNT
+C up to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C The trick with VAR_ADJUST means it's only necessary to do one fetch in the
+C outer loop to take care of xp, wp and the inner loop counter.
+
+defframe(VAR_COUNTER, -20)
+defframe(VAR_ADJUST, -24)
+defframe(VAR_JMP, -28)
+defframe(VAR_XP_LOW, -32)
+deflit(VAR_EXTRA_SPACE, 16)
+
+
+L(unroll):
+ C eax yp
+ C ebx
+ C ecx xsize
+ C edx ysize-1
+ C esi xp end
+ C edi wp end of mul1
+ C ebp
+
+ movl PARAM_XP, %esi
+ movl 4(%eax), %ebp C multiplier (yp second limb)
+ leal 4(%eax,%edx,4), %eax C yp adjust for ysize indexing
+
+ movl PARAM_WP, %edi
+ movl %eax, PARAM_YP
+ negl %edx
+
+ movl %edx, PARAM_YSIZE
+ leal UNROLL_COUNT-2(%ecx), %ebx C (xsize-1)+UNROLL_COUNT-1
+ decl %ecx C xsize-1
+
+ movl (%esi), %eax C xp low limb
+ andl $-UNROLL_MASK-1, %ebx
+ negl %ecx
+
+ subl $VAR_EXTRA_SPACE, %esp
+deflit(`FRAME',16+VAR_EXTRA_SPACE)
+ negl %ebx
+ andl $UNROLL_MASK, %ecx
+
+ movl %ebx, VAR_ADJUST
+ movl %ecx, %edx
+ shll $4, %ecx
+
+ sarl $UNROLL_LOG2, %ebx
+
+ C 17 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(unroll_here):
+',`
+ leal L(unroll_entry) (%ecx,%edx,1), %ecx
+')
+ negl %edx
+
+ movl %eax, VAR_XP_LOW
+ movl %ecx, VAR_JMP
+ leal 4(%edi,%edx,4), %edi C wp and xp, adjust for unrolling,
+ leal 4(%esi,%edx,4), %esi C and start at second limb
+ jmp L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%ecx,%edx,1), %ecx
+ addl $L(unroll_entry)-L(unroll_here), %ecx
+ addl (%esp), %ecx
+ ret
+')
+
+
+C --------------------------------------------------------------------------
+ ALIGN(32)
+L(unroll_outer_top):
+ C ebp ysize counter, negative
+
+ movl VAR_ADJUST, %ebx
+ movl PARAM_YP, %edx
+
+ movl VAR_XP_LOW, %eax
+ movl %ebp, PARAM_YSIZE C store incremented ysize counter
+
+ leal 4(%edi,%ebx,4), %edi
+ leal (%esi,%ebx,4), %esi
+ sarl $UNROLL_LOG2, %ebx
+
+ movl (%edx,%ebp,4), %ebp C yp next multiplier
+ movl VAR_JMP, %ecx
+
+L(unroll_outer_entry):
+ mull %ebp
+
+ testb $1, %cl C and clear carry bit
+ movl %ebx, VAR_COUNTER
+ movl $0, %ebx
+
+ movl $0, %ecx
+ cmovz( %eax, %ecx) C eax into low carry, zero into high carry limb
+ cmovnz( %eax, %ebx)
+
+ C Extra fetch of VAR_JMP is bad, but registers are tight
+ jmp *VAR_JMP
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(unroll_top):
+ C eax xp limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi xp+8
+ C edi wp
+ C ebp yp multiplier limb
+ C
+ C VAR_COUNTER loop counter, negative
+ C
+ C 17 bytes each limb
+
+L(unroll_entry):
+
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%esi), %eax)
+ adcl %edx, %ebx
+
+ mull %ebp
+
+Zdisp( addl, %ecx, disp0,(%edi))
+ movl $0, %ecx
+
+ adcl %eax, %ebx
+
+
+ movl disp1(%esi), %eax
+ adcl %edx, %ecx
+
+ mull %ebp
+
+ addl %ebx, disp1(%edi)
+ movl $0, %ebx
+
+ adcl %eax, %ecx
+')
+
+
+ incl VAR_COUNTER
+ leal UNROLL_BYTES(%esi), %esi
+ leal UNROLL_BYTES(%edi), %edi
+
+ jnz L(unroll_top)
+
+
+ C eax
+ C ebx zero
+ C ecx low
+ C edx high
+ C esi
+ C edi wp, pointing at second last limb)
+ C ebp
+ C
+ C carry flag to be added to high
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 4))
+
+ movl PARAM_YSIZE, %ebp
+ adcl $0, %edx
+ addl %ecx, disp0(%edi)
+
+ adcl $0, %edx
+ incl %ebp
+
+ movl %edx, disp1(%edi)
+ jnz L(unroll_outer_top)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/sqr_basecase.asm b/rts/gmp/mpn/x86/k7/sqr_basecase.asm
new file mode 100644
index 0000000000..84861ea66b
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/sqr_basecase.asm
@@ -0,0 +1,627 @@
+dnl AMD K7 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product
+dnl (measured on the speed difference between 25 and 50 limbs, which is
+dnl roughly the Karatsuba recursing range).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
+dnl some comments.
+
+deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66)
+
+ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',
+`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')
+
+m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')
+deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C With a KARATSUBA_SQR_THRESHOLD around 50 this code is about 1500 bytes,
+C which is quite a bit, but is considered good value since squares big
+C enough to use most of the code will be spending quite a few cycles in it.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl PARAM_SRC, %eax
+ cmpl $2, %ecx
+
+ movl PARAM_DST, %edx
+ je L(two_limbs)
+ ja L(three_or_more)
+
+
+C------------------------------------------------------------------------------
+C one limb only
+ C eax src
+ C ecx size
+ C edx dst
+
+ movl (%eax), %eax
+ movl %edx, %ecx
+
+ mull %eax
+
+ movl %edx, 4(%ecx)
+ movl %eax, (%ecx)
+ ret
+
+
+C------------------------------------------------------------------------------
+C
+C Using the read/modify/write "add"s seems to be faster than saving and
+C restoring registers. Perhaps the loads for the first set hide under the
+C mul latency and the second gets store to load forwarding.
+
+ ALIGN(16)
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+deflit(`FRAME',0)
+
+ pushl %ebx FRAME_pushl()
+ movl %eax, %ebx C src
+ movl (%eax), %eax
+
+ movl %edx, %ecx C dst
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx) C dst[0]
+ movl 4(%ebx), %eax
+
+ movl %edx, 4(%ecx) C dst[1]
+
+ mull %eax C src[1]^2
+
+ movl %eax, 8(%ecx) C dst[2]
+ movl (%ebx), %eax
+
+ movl %edx, 12(%ecx) C dst[3]
+
+ mull 4(%ebx) C src[0]*src[1]
+
+ popl %ebx
+
+ addl %eax, 4(%ecx)
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+ ASSERT(nc)
+
+ addl %eax, 4(%ecx)
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+ ASSERT(nc)
+
+ ret
+
+
+C------------------------------------------------------------------------------
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(STACK_SPACE, 16)
+
+L(three_or_more):
+ subl $STACK_SPACE, %esp
+ cmpl $4, %ecx
+ jae L(four_or_more)
+deflit(`FRAME',STACK_SPACE)
+
+
+C------------------------------------------------------------------------------
+C Three limbs
+C
+C Writing out the loads and stores separately at the end of this code comes
+C out about 10 cycles faster than using adcls to memory.
+
+ C eax src
+ C ecx size
+ C edx dst
+
+ movl %ebx, SAVE_EBX
+ movl %eax, %ebx C src
+ movl (%eax), %eax
+
+ movl %edx, %ecx C dst
+ movl %esi, SAVE_ESI
+ movl %edi, SAVE_EDI
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl 4(%ebx), %eax
+ movl %edx, 4(%ecx)
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl 8(%ebx), %eax
+ movl %edx, 12(%ecx)
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl (%ebx), %eax
+ movl %edx, 20(%ecx)
+
+ mull 4(%ebx) C src[0] * src[1]
+
+ movl %eax, %esi
+ movl (%ebx), %eax
+ movl %edx, %edi
+
+ mull 8(%ebx) C src[0] * src[2]
+
+ addl %eax, %edi
+ movl %ebp, SAVE_EBP
+ movl $0, %ebp
+
+ movl 4(%ebx), %eax
+ adcl %edx, %ebp
+
+ mull 8(%ebx) C src[1] * src[2]
+
+ xorl %ebx, %ebx
+ addl %eax, %ebp
+
+ adcl $0, %edx
+
+ C eax
+ C ebx zero, will be dst[5]
+ C ecx dst
+ C edx dst[4]
+ C esi dst[1]
+ C edi dst[2]
+ C ebp dst[3]
+
+ adcl $0, %edx
+ addl %esi, %esi
+
+ adcl %edi, %edi
+ movl 4(%ecx), %eax
+
+ adcl %ebp, %ebp
+
+ adcl %edx, %edx
+
+ adcl $0, %ebx
+ addl %eax, %esi
+ movl 8(%ecx), %eax
+
+ adcl %eax, %edi
+ movl 12(%ecx), %eax
+ movl %esi, 4(%ecx)
+
+ adcl %eax, %ebp
+ movl 16(%ecx), %eax
+ movl %edi, 8(%ecx)
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EDI, %edi
+
+ adcl %eax, %edx
+ movl 20(%ecx), %eax
+ movl %ebp, 12(%ecx)
+
+ adcl %ebx, %eax
+ ASSERT(nc)
+ movl SAVE_EBX, %ebx
+ movl SAVE_EBP, %ebp
+
+ movl %edx, 16(%ecx)
+ movl %eax, 20(%ecx)
+ addl $FRAME, %esp
+
+ ret
+
+
+C------------------------------------------------------------------------------
+L(four_or_more):
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+C Further products are added in rather than stored.
+
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+ C esi
+ C edi
+ C ebp
+
+defframe(`VAR_COUNTER',-20)
+defframe(`VAR_JMP', -24)
+deflit(EXTRA_STACK_SPACE, 8)
+
+ movl %ebx, SAVE_EBX
+ movl %edi, SAVE_EDI
+ leal (%edx,%ecx,4), %edi C &dst[size]
+
+ movl %esi, SAVE_ESI
+ movl %ebp, SAVE_EBP
+ leal (%eax,%ecx,4), %esi C &src[size]
+
+ movl (%eax), %ebp C multiplier
+ movl $0, %ebx
+ decl %ecx
+
+ negl %ecx
+ subl $EXTRA_STACK_SPACE, %esp
+FRAME_subl_esp(EXTRA_STACK_SPACE)
+
+L(mul_1):
+ C eax scratch
+ C ebx carry
+ C ecx counter
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp multiplier
+
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl %eax, (%edi,%ecx,4)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ incl %ecx
+ jnz L(mul_1)
+
+
+C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two products, which are the bottom right corner of the product
+C triangle, are left to the end. These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1]. If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as in mpn_addmul_1, see that routine for
+C some comments.
+C
+C VAR_COUNTER is the outer loop, running from -size+4 to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+C
+C K7 does branch prediction on indirect jumps, which is bad since it's a
+C different target each time. There seems no way to avoid this.
+
+dnl This value also hard coded in some shifts and adds
+deflit(CODE_BYTES_PER_LIMB, 17)
+
+dnl With the unmodified &src[size] and &dst[size] pointers, the
+dnl displacements in the unrolled code fit in a byte for UNROLL_COUNT
+dnl values up to 31, but above that an offset must be added to them.
+
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>31),1,
+eval((UNROLL_COUNT-31)*4),
+0))
+
+dnl Because the last chunk of code is generated differently, a label placed
+dnl at the end doesn't work. Instead calculate the implied end using the
+dnl start and how many chunks of code there are.
+
+deflit(UNROLL_INNER_END,
+`L(unroll_inner_start)+eval(UNROLL_COUNT*CODE_BYTES_PER_LIMB)')
+
+ C eax
+ C ebx carry
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+ movl %ebx, (%edi)
+
+ subl $4, %ecx
+ jz L(corner)
+
+ negl %ecx
+ifelse(OFFSET,0,,`subl $OFFSET, %edi')
+ifelse(OFFSET,0,,`subl $OFFSET, %esi')
+
+ movl %ecx, %edx
+ shll $4, %ecx
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+
+
+ C The calculated jump mustn't come out to before the start of the
+ C code available. This is the limit UNROLL_COUNT puts on the src
+ C operand size, but checked here directly using the jump address.
+ ASSERT(ae,
+ `movl_text_address(L(unroll_inner_start), %eax)
+ cmpl %eax, %ecx')
+
+
+C------------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll_outer_top):
+ C eax
+ C ebx high limb to store
+ C ecx VAR_JMP
+ C edx VAR_COUNTER, limbs, negative
+ C esi &src[size], constant
+ C edi dst ptr, high of last addmul
+ C ebp
+
+ movl -12+OFFSET(%esi,%edx,4), %ebp C next multiplier
+ movl -8+OFFSET(%esi,%edx,4), %eax C first of multiplicand
+
+ movl %edx, VAR_COUNTER
+
+ mull %ebp
+
+define(cmovX,`ifelse(eval(UNROLL_COUNT%2),0,`cmovz($@)',`cmovnz($@)')')
+
+ testb $1, %cl
+ movl %edx, %ebx C high carry
+ movl %ecx, %edx C jump
+
+ movl %eax, %ecx C low carry
+ cmovX( %ebx, %ecx) C high carry reverse
+ cmovX( %eax, %ebx) C low carry reverse
+
+ leal CODE_BYTES_PER_LIMB(%edx), %eax
+ xorl %edx, %edx
+ leal 4(%edi), %edi
+
+ movl %eax, VAR_JMP
+
+ jmp *%eax
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ addl (%esp), %ecx
+ addl $UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)-L(here), %ecx
+ addl %edx, %ecx
+ ret
+')
+
+
+ C Must be an even address to preserve the significance of the low
+ C bit of the jump address indicating which way around ecx/ebx should
+ C start.
+ ALIGN(2)
+
+L(unroll_inner_start):
+ C eax next limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+forloop(`i', UNROLL_COUNT, 1, `
+ deflit(`disp_src', eval(-i*4 + OFFSET))
+ deflit(`disp_dst', eval(disp_src - 4))
+
+ m4_assert(`disp_src>=-128 && disp_src<128')
+ m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl, disp_src,(%esi), %eax)
+ adcl %edx, %ebx
+
+ mull %ebp
+
+Zdisp( addl, %ecx, disp_dst,(%edi))
+ movl $0, %ecx
+
+ adcl %eax, %ebx
+
+',`
+ dnl this bit comes out last
+Zdisp( movl, disp_src,(%esi), %eax)
+ adcl %edx, %ecx
+
+ mull %ebp
+
+dnl Zdisp( addl %ebx, disp_src,(%edi))
+ addl %ebx, disp_dst(%edi)
+ifelse(forloop_last,0,
+` movl $0, %ebx')
+
+ adcl %eax, %ecx
+')
+')
+
+ C eax next limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ adcl $0, %edx
+ addl %ecx, -4+OFFSET(%edi)
+ movl VAR_JMP, %ecx
+
+ adcl $0, %edx
+
+ movl %edx, m4_empty_if_zero(OFFSET) (%edi)
+ movl VAR_COUNTER, %edx
+
+ incl %edx
+ jnz L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+ addl $OFFSET, %esi
+ addl $OFFSET, %edi
+')
+
+
+C------------------------------------------------------------------------------
+L(corner):
+ C esi &src[size]
+ C edi &dst[2*size-5]
+
+ movl -12(%esi), %ebp
+ movl -8(%esi), %eax
+ movl %eax, %ecx
+
+ mull %ebp
+
+ addl %eax, -4(%edi)
+ movl -4(%esi), %eax
+
+ adcl $0, %edx
+ movl %edx, %ebx
+ movl %eax, %esi
+
+ mull %ebp
+
+ addl %ebx, %eax
+
+ adcl $0, %edx
+ addl %eax, (%edi)
+ movl %esi, %eax
+
+ adcl $0, %edx
+ movl %edx, %ebx
+
+ mull %ecx
+
+ addl %ebx, %eax
+ movl %eax, 4(%edi)
+
+ adcl $0, %edx
+ movl %edx, 8(%edi)
+
+
+
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+L(lshift_start):
+ movl PARAM_SIZE, %eax
+ movl PARAM_DST, %edi
+ xorl %ecx, %ecx C clear carry
+
+ leal (%edi,%eax,8), %edi
+ notl %eax C -size-1, preserve carry
+
+ leal 2(%eax), %eax C -(size-1)
+
+L(lshift):
+ C eax counter, negative
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi dst, pointing just after last limb
+ C ebp
+
+ rcll -4(%edi,%eax,8)
+ rcll (%edi,%eax,8)
+ incl %eax
+ jnz L(lshift)
+
+ setc %al
+
+ movl PARAM_SRC, %esi
+ movl %eax, -4(%edi) C dst most significant limb
+
+ movl PARAM_SIZE, %ecx
+
+
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+ movl (%esi), %eax C src[0]
+
+ mull %eax
+
+ leal (%esi,%ecx,4), %esi C src point just after last limb
+ negl %ecx
+
+ movl %eax, (%edi,%ecx,8) C dst[0]
+ incl %ecx
+
+L(diag):
+ C eax scratch
+ C ebx scratch
+ C ecx counter, negative
+ C edx carry
+ C esi src just after last limb
+ C edi dst just after last limb
+ C ebp
+
+ movl (%esi,%ecx,4), %eax
+ movl %edx, %ebx
+
+ mull %eax
+
+ addl %ebx, -4(%edi,%ecx,8)
+ adcl %eax, (%edi,%ecx,8)
+ adcl $0, %edx
+
+ incl %ecx
+ jnz L(diag)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+
+ addl %edx, -4(%edi) C dst most significant limb
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/lshift.asm b/rts/gmp/mpn/x86/lshift.asm
new file mode 100644
index 0000000000..4735335cbe
--- /dev/null
+++ b/rts/gmp/mpn/x86/lshift.asm
@@ -0,0 +1,90 @@
+dnl x86 mpn_lshift -- mpn left shift.
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+deflit(`FRAME',12)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%edx
+ movl PARAM_SHIFT,%ecx
+
+ subl $4,%esi C adjust src
+
+ movl (%esi,%edx,4),%ebx C read most significant limb
+ xorl %eax,%eax
+ shldl( %cl, %ebx, %eax) C compute carry limb
+ decl %edx
+ jz L(end)
+ pushl %eax C push carry limb onto stack
+ testb $1,%dl
+ jnz L(1) C enter loop in the middle
+ movl %ebx,%eax
+
+ ALIGN(8)
+L(oop): movl (%esi,%edx,4),%ebx C load next lower limb
+ shldl( %cl, %ebx, %eax) C compute result limb
+ movl %eax,(%edi,%edx,4) C store it
+ decl %edx
+L(1): movl (%esi,%edx,4),%eax
+ shldl( %cl, %eax, %ebx)
+ movl %ebx,(%edi,%edx,4)
+ decl %edx
+ jnz L(oop)
+
+ shll %cl,%eax C compute least significant limb
+ movl %eax,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+L(end): shll %cl,%ebx C compute least significant limb
+ movl %ebx,(%edi) C store it
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/mod_1.asm b/rts/gmp/mpn/x86/mod_1.asm
new file mode 100644
index 0000000000..3908161b3e
--- /dev/null
+++ b/rts/gmp/mpn/x86/mod_1.asm
@@ -0,0 +1,141 @@
+dnl x86 mpn_mod_1 -- mpn by limb remainder.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl cycles/limb
+dnl K6 20
+dnl P5 44
+dnl P6 39
+dnl 486 approx 42 maybe
+dnl
+dnl The following have their own optimized mod_1 implementations, but for
+dnl reference the code here runs as follows.
+dnl
+dnl P6MMX 39
+dnl K7 41
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C Divide src,size by divisor and return the remainder. The quotient is
+C discarded.
+C
+C See mpn/x86/divrem_1.asm for some comments.
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ orl %ecx, %ecx
+
+ movl PARAM_CARRY, %edx
+ jnz LF(mpn_mod_1,top)
+
+ popl %esi
+ movl %edx, %eax
+
+ popl %ebx
+
+ ret
+
+EPILOGUE()
+
+
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ pushl %esi FRAME_pushl()
+
+ orl %ecx, %ecx
+ jz L(done_zero)
+
+ movl PARAM_DIVISOR, %esi
+ movl -4(%ebx,%ecx,4), %eax C src high limb
+
+ cmpl %esi, %eax
+
+ sbbl %edx, %edx C -1 if high<divisor
+
+ addl %edx, %ecx C skip one division if high<divisor
+ jz L(done_eax)
+
+ andl %eax, %edx C carry if high<divisor
+
+
+L(top):
+ C eax scratch (quotient)
+ C ebx src
+ C ecx counter
+ C edx carry (remainder)
+ C esi divisor
+ C edi
+ C ebp
+
+ movl -4(%ebx,%ecx,4), %eax
+
+ divl %esi
+
+ loop_or_decljnz L(top)
+
+
+ movl %edx, %eax
+L(done_eax):
+ popl %esi
+
+ popl %ebx
+
+ ret
+
+
+L(done_zero):
+ popl %esi
+ xorl %eax, %eax
+
+ popl %ebx
+
+ ret
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/mul_1.asm b/rts/gmp/mpn/x86/mul_1.asm
new file mode 100644
index 0000000000..8817f291bc
--- /dev/null
+++ b/rts/gmp/mpn/x86/mul_1.asm
@@ -0,0 +1,130 @@
+dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
+dnl with a limb and store the result in a second limb vector.
+dnl
+dnl cycles/limb
+dnl P6: 5.5
+dnl
+dnl The following CPUs have their own optimized code, but for reference the
+dnl code here runs as follows.
+dnl
+dnl cycles/limb
+dnl P5: 12.5
+dnl K6: 10.5
+dnl K7: 4.5
+
+
+dnl Copyright (C) 1992, 1994, 1997, 1998, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ecx
+
+ xorl %ebx,%ebx
+ andl $3,%ecx
+ jz L(end0)
+
+L(oop0):
+ movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ leal 4(%esi),%esi
+ addl %ebx,%eax
+ movl $0,%ebx
+ adcl %ebx,%edx
+ movl %eax,(%edi)
+ movl %edx,%ebx C propagate carry into cylimb
+
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oop0)
+
+L(end0):
+ movl PARAM_SIZE,%ecx
+ shrl $2,%ecx
+ jz L(end)
+
+
+ ALIGN(8)
+L(oop): movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ addl %eax,%ebx
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 4(%esi),%eax
+ mull PARAM_MULTIPLIER
+ movl %ebx,(%edi)
+ addl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl 8(%esi),%eax
+ mull PARAM_MULTIPLIER
+ movl %ebp,4(%edi)
+ addl %eax,%ebx C new lo + cylimb
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 12(%esi),%eax
+ mull PARAM_MULTIPLIER
+ movl %ebx,8(%edi)
+ addl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl %ebp,12(%edi)
+
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ecx
+ jnz L(oop)
+
+L(end): movl %ebx,%eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/mul_basecase.asm b/rts/gmp/mpn/x86/mul_basecase.asm
new file mode 100644
index 0000000000..3a9b73895b
--- /dev/null
+++ b/rts/gmp/mpn/x86/mul_basecase.asm
@@ -0,0 +1,209 @@
+dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
+dnl in a third limb vector.
+
+
+dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+C
+C This was written in a haste since the Pentium optimized code that was used
+C for all x86 machines was slow for the Pentium II. This code would benefit
+C from some cleanup.
+C
+C To shave off some percentage of the run-time, one should make 4 variants
+C of the Louter loop, for the four different outcomes of un mod 4. That
+C would avoid Loop0 altogether. Code expansion would be > 4-fold for that
+C part of the function, but since it is not very large, that would be
+C acceptable.
+C
+C The mul loop (at L(oopM)) might need some tweaking. It's current speed is
+C unknown.
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+defframe(VAR_MULTIPLIER, -4)
+defframe(VAR_COUNTER, -8)
+deflit(VAR_STACK_SPACE, 8)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+ subl $VAR_STACK_SPACE,%esp
+ pushl %esi
+ pushl %ebp
+ pushl %edi
+deflit(`FRAME',eval(VAR_STACK_SPACE+12))
+
+ movl PARAM_XP,%esi
+ movl PARAM_WP,%edi
+ movl PARAM_YP,%ebp
+
+ movl (%esi),%eax C load xp[0]
+ mull (%ebp) C multiply by yp[0]
+ movl %eax,(%edi) C store to wp[0]
+ movl PARAM_XSIZE,%ecx C xsize
+ decl %ecx C If xsize = 1, ysize = 1 too
+ jz L(done)
+
+ pushl %ebx
+FRAME_pushl()
+ movl %edx,%ebx
+
+ leal 4(%esi),%esi
+ leal 4(%edi),%edi
+
+L(oopM):
+ movl (%esi),%eax C load next limb at xp[j]
+ leal 4(%esi),%esi
+ mull (%ebp)
+ addl %ebx,%eax
+ movl %edx,%ebx
+ adcl $0,%ebx
+ movl %eax,(%edi)
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oopM)
+
+ movl %ebx,(%edi) C most significant limb of product
+ addl $4,%edi C increment wp
+ movl PARAM_XSIZE,%eax
+ shll $2,%eax
+ subl %eax,%edi
+ subl %eax,%esi
+
+ movl PARAM_YSIZE,%eax C ysize
+ decl %eax
+ jz L(skip)
+ movl %eax,VAR_COUNTER C set index i to ysize
+
+L(outer):
+ movl PARAM_YP,%ebp C yp
+ addl $4,%ebp C make ebp point to next v limb
+ movl %ebp,PARAM_YP
+ movl (%ebp),%eax C copy y limb ...
+ movl %eax,VAR_MULTIPLIER C ... to stack slot
+ movl PARAM_XSIZE,%ecx
+
+ xorl %ebx,%ebx
+ andl $3,%ecx
+ jz L(end0)
+
+L(oop0):
+ movl (%esi),%eax
+ mull VAR_MULTIPLIER
+ leal 4(%esi),%esi
+ addl %ebx,%eax
+ movl $0,%ebx
+ adcl %ebx,%edx
+ addl %eax,(%edi)
+ adcl %edx,%ebx C propagate carry into cylimb
+
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oop0)
+
+L(end0):
+ movl PARAM_XSIZE,%ecx
+ shrl $2,%ecx
+ jz L(endX)
+
+ ALIGN(8)
+L(oopX):
+ movl (%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %eax,%ebx
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 4(%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %ebx,(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl 8(%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %ebp,4(%edi)
+ adcl %eax,%ebx C new lo + cylimb
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 12(%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %ebx,8(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ addl %ebp,12(%edi)
+ adcl $0,%ebx C propagate carry into cylimb
+
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ecx
+ jnz L(oopX)
+
+L(endX):
+ movl %ebx,(%edi)
+ addl $4,%edi
+
+ C we incremented wp and xp in the loop above; compensate
+ movl PARAM_XSIZE,%eax
+ shll $2,%eax
+ subl %eax,%edi
+ subl %eax,%esi
+
+ movl VAR_COUNTER,%eax
+ decl %eax
+ movl %eax,VAR_COUNTER
+ jnz L(outer)
+
+L(skip):
+ popl %ebx
+ popl %edi
+ popl %ebp
+ popl %esi
+ addl $8,%esp
+ ret
+
+L(done):
+ movl %edx,4(%edi) C store to wp[1]
+ popl %edi
+ popl %ebp
+ popl %esi
+ addl $8,%esp
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/README b/rts/gmp/mpn/x86/p6/README
new file mode 100644
index 0000000000..7dbc905a0d
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/README
@@ -0,0 +1,95 @@
+
+ INTEL P6 MPN SUBROUTINES
+
+
+
+This directory contains code optimized for Intel P6 class CPUs, meaning
+PentiumPro, Pentium II and Pentium III. The mmx and p3mmx subdirectories
+have routines using MMX instructions.
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache, are as follows.
+Some of these might be able to be improved.
+
+ cycles/limb
+
+ mpn_add_n/sub_n 3.7
+
+ mpn_copyi 0.75
+ mpn_copyd 2.4
+
+ mpn_divrem_1 39.0
+ mpn_mod_1 39.0
+ mpn_divexact_by3 8.5
+
+ mpn_mul_1 5.5
+ mpn_addmul/submul_1 6.35
+
+ mpn_l/rshift 2.5
+
+ mpn_mul_basecase 8.2 cycles/crossproduct (approx)
+ mpn_sqr_basecase 4.0 cycles/crossproduct (approx)
+ or 7.75 cycles/triangleproduct (approx)
+
+Pentium II and III have MMX and get the following improvements.
+
+ mpn_divrem_1 25.0 integer part, 17.5 fractional part
+ mpn_mod_1 24.0
+
+ mpn_l/rshift 1.75
+
+
+
+
+NOTES
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+
+Mispredicted branches have a penalty of between 9 and 15 cycles, and even up
+to 26 cycles depending how far speculative execution has gone. The 9 cycle
+minimum penalty comes from the issue pipeline being 9 stages.
+
+A copy with rep movs seems to copy 16 bytes at a time, since speeds for 4,
+5, 6 or 7 limb operations are all the same. The 0.75 cycles/limb would be 3
+cycles per 16 byte block.
+
+
+
+
+CODING
+
+Instructions in general code have been shown grouped if they can execute
+together, which means up to three instructions with no successive
+dependencies, and with only the first being a multiple micro-op.
+
+P6 has out-of-order execution, so the groupings are really only showing
+dependent paths where some shuffling might allow some latencies to be
+hidden.
+
+
+
+
+REFERENCES
+
+"Intel Architecture Optimization Reference Manual", 1999, revision 001 dated
+02/99, order number 245127 (order number 730795-001 is in the document too).
+Available on-line:
+
+ http://download.intel.com/design/PentiumII/manuals/245127.htm
+
+"Intel Architecture Optimization Manual", 1997, order number 242816. This
+is an older document mostly about P5 and not as good as the above.
+Available on-line:
+
+ http://download.intel.com/design/PentiumII/manuals/242816.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/p6/aorsmul_1.asm b/rts/gmp/mpn/x86/p6/aorsmul_1.asm
new file mode 100644
index 0000000000..feb364ec0b
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/aorsmul_1.asm
@@ -0,0 +1,300 @@
+dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl
+dnl P6: 6.35 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl P6 UNROLL_COUNT cycles/limb
+dnl 8 6.7
+dnl 16 6.35
+dnl 32 6.3
+dnl 64 6.3
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1', `
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+ define(M4_function_1c, mpn_addmul_1c)
+ define(M4_description, add it to)
+ define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1', `
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+ define(M4_function_1c, mpn_submul_1c)
+ define(M4_description, subtract it from)
+ define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+C
+C This code is pretty much the same as the K6 code. The unrolled loop is
+C the same, but there's just a few scheduling tweaks in the setups and the
+C simple loop.
+C
+C A number of variations have been tried for the unrolled loop, with one or
+C two carries, and with loads scheduled earlier, but nothing faster than 6
+C cycles/limb has been found.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function_1c)
+ pushl %ebx
+deflit(`FRAME',4)
+ movl PARAM_CARRY, %ebx
+ jmp LF(M4_function_1,start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function_1)
+ push %ebx
+deflit(`FRAME',4)
+ xorl %ebx, %ebx C initial carry
+
+L(start_nc):
+ movl PARAM_SIZE, %ecx
+ pushl %esi
+deflit(`FRAME',8)
+
+ movl PARAM_SRC, %esi
+ pushl %edi
+deflit(`FRAME',12)
+
+ movl PARAM_DST, %edi
+ pushl %ebp
+deflit(`FRAME',16)
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_MULTIPLIER, %ebp
+ jae L(unroll)
+
+
+ C simple loop
+ C this is offset 0x22, so close enough to aligned
+L(simple):
+ C eax scratch
+ C ebx carry
+ C ecx counter
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ movl (%esi), %eax
+ addl $4, %edi
+
+ mull %ebp
+
+ addl %ebx, %eax
+ adcl $0, %edx
+
+ M4_inst %eax, -4(%edi)
+ movl %edx, %ebx
+
+ adcl $0, %ebx
+ decl %ecx
+
+ leal 4(%esi), %esi
+ jnz L(simple)
+
+
+ popl %ebp
+ popl %edi
+
+ popl %esi
+ movl %ebx, %eax
+
+ popl %ebx
+ ret
+
+
+
+C------------------------------------------------------------------------------
+C VAR_JUMP holds the computed jump temporarily because there's not enough
+C registers when doing the mul for the initial two carry limbs.
+C
+C The add/adc for the initial carry in %ebx is necessary only for the
+C mpn_add/submul_1c entry points. Duplicating the startup code to
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
+C idea.
+
+dnl overlapping with parameters already fetched
+define(VAR_COUNTER,`PARAM_SIZE')
+define(VAR_JUMP, `PARAM_DST')
+
+ C this is offset 0x43, so close enough to aligned
+L(unroll):
+ C eax
+ C ebx initial carry
+ C ecx size
+ C edx
+ C esi src
+ C edi dst
+ C ebp
+
+ movl %ecx, %edx
+ decl %ecx
+
+ subl $2, %edx
+ negl %ecx
+
+ shrl $UNROLL_LOG2, %edx
+ andl $UNROLL_MASK, %ecx
+
+ movl %edx, VAR_COUNTER
+ movl %ecx, %edx
+
+ C 15 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ shll $4, %edx
+ negl %ecx
+
+ leal L(entry) (%edx,%ecx,1), %edx
+')
+ movl (%esi), %eax C src low limb
+
+ movl %edx, VAR_JUMP
+ leal ifelse(UNROLL_BYTES,256,128+) 4(%esi,%ecx,4), %esi
+
+ mull %ebp
+
+ addl %ebx, %eax C initial carry (from _1c)
+ adcl $0, %edx
+
+ movl %edx, %ebx C high carry
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%ecx,4), %edi
+
+ movl VAR_JUMP, %edx
+ testl $1, %ecx
+ movl %eax, %ecx C low carry
+
+ cmovnz( %ebx, %ecx) C high,low carry other way around
+ cmovnz( %eax, %ebx)
+
+ jmp *%edx
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ shll $4, %edx
+ negl %ecx
+
+ C See README.family about old gas bugs
+ leal (%edx,%ecx,1), %edx
+ addl $L(entry)-L(here), %edx
+
+ addl (%esp), %edx
+
+ ret
+')
+
+
+C -----------------------------------------------------------
+ ALIGN(32)
+L(top):
+deflit(`FRAME',16)
+ C eax scratch
+ C ebx carry hi
+ C ecx carry lo
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+ C
+ C VAR_COUNTER loop counter
+ C
+ C 15 code bytes per limb
+
+ addl $UNROLL_BYTES, %edi
+
+L(entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*4*CHUNK_COUNT ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%esi), %eax)
+ mull %ebp
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+ adcl %eax, %ebx
+ movl %edx, %ecx
+ adcl $0, %ecx
+
+ movl disp1(%esi), %eax
+ mull %ebp
+ M4_inst %ebx, disp1(%edi)
+ adcl %eax, %ecx
+ movl %edx, %ebx
+ adcl $0, %ebx
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%esi), %esi
+
+ jns L(top)
+
+
+deflit(`disp0', eval(UNROLL_BYTES ifelse(UNROLL_BYTES,256,-128)))
+
+ M4_inst %ecx, disp0(%edi)
+ movl %ebx, %eax
+
+ popl %ebp
+ popl %edi
+
+ popl %esi
+ popl %ebx
+ adcl $0, %eax
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/diveby3.asm b/rts/gmp/mpn/x86/p6/diveby3.asm
new file mode 100644
index 0000000000..a77703ea89
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/diveby3.asm
@@ -0,0 +1,37 @@
+dnl Intel P6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl P6: 8.5 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl The P5 code runs well on P6, in fact better than anything else found so
+dnl far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the
+dnl dependent path are taking 4.5 cycles.
+dnl
+dnl The destination cache line prefetching is unnecessary on P6, but
+dnl removing it is a 2 cycle slowdown (approx), so it must be inducing
+dnl something good in the out of order execution.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_divexact_by3c)
+include_mpn(`x86/pentium/diveby3.asm')
diff --git a/rts/gmp/mpn/x86/p6/gmp-mparam.h b/rts/gmp/mpn/x86/p6/gmp-mparam.h
new file mode 100644
index 0000000000..d7bfb6d60c
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/gmp-mparam.h
@@ -0,0 +1,96 @@
+/* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 5 /* cycles */
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 39 /* cycles */
+#endif
+
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 2 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 23
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 139
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 52
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 166
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 116
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 66
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 20
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 54
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 592, 1440, 2688, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 608
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 5888
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 656, 1504, 2944, 6656, 18432, 57344, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 672
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 5888
+#endif
diff --git a/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm b/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm
new file mode 100644
index 0000000000..f1b011b623
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm
@@ -0,0 +1,677 @@
+dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division.
+dnl
+dnl P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor, mp_limb_t carry);
+C
+C This code is a lightly reworked version of mpn/x86/k7/mmx/divrem_1.asm,
+C see that file for some comments. It's likely what's here can be improved.
+
+
+dnl MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl inverse method is used, rather than plain "divl"s. Minimum value 1.
+dnl
+dnl The different speeds of the integer and fraction parts means that using
+dnl xsize+size isn't quite right. The threshold wants to be a bit higher
+dnl for the integer part and a bit lower for the fraction part. (Or what's
+dnl really wanted is to speed up the integer part!)
+dnl
+dnl The threshold is set to make the integer part right. At 4 limbs the
+dnl div and mul are about the same there, but on the fractional part the
+dnl mul is much faster.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_CARRY, 24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC, 12)
+defframe(PARAM_XSIZE, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC, -28)
+defframe(VAR_DST, -32)
+defframe(VAR_DST_STOP,-36)
+
+deflit(STACK_SPACE, 36)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ leal -4(%edi,%ebx,4), %edi
+ jmp LF(mpn_divrem_1,start_1c)
+
+EPILOGUE()
+
+
+ C offset 0x31, close enough to aligned
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ orl %ecx, %ecx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ leal -4(%edi,%ebx,4), %edi C &dst[xsize-1]
+ jz L(no_skip_div)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+ cmpl %ebp, %eax C one less div if high<divisor
+ jnb L(no_skip_div)
+
+ movl $0, (%edi,%ecx,4) C dst high limb
+ decl %ecx C size-1
+ movl %eax, %edx C src high limb as initial carry
+L(no_skip_div):
+
+
+L(start_1c):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ leal (%ebx,%ecx), %eax C size+xsize
+ cmpl $MUL_THRESHOLD, %eax
+ jae L(mul_by_inverse)
+
+ orl %ecx, %ecx
+ jz L(divide_no_integer)
+
+L(divide_integer):
+ C eax scratch (quotient)
+ C ebx xsize
+ C ecx counter
+ C edx scratch (remainder)
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl %ebp
+
+ movl %eax, (%edi,%ecx,4)
+ decl %ecx
+ jnz L(divide_integer)
+
+
+L(divide_no_integer):
+ movl PARAM_DST, %edi
+ orl %ebx, %ebx
+ jnz L(divide_fraction)
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBX, %ebx
+ movl %edx, %eax
+
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+L(divide_fraction):
+ C eax scratch (quotient)
+ C ebx counter
+ C ecx
+ C edx scratch (remainder)
+ C esi
+ C edi dst
+ C ebp divisor
+
+ movl $0, %eax
+
+ divl %ebp
+
+ movl %eax, -4(%edi,%ebx,4)
+ decl %ebx
+ jnz L(divide_fraction)
+
+ jmp L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ leal 12(%edi), %ebx
+
+ movl %ebx, VAR_DST_STOP
+ leal 4(%edi,%ecx,4), %edi C &dst[xsize+size]
+
+ movl %edi, VAR_DST
+ movl %ecx, %ebx C size
+
+ bsrl %ebp, %ecx C 31-l
+ movl %edx, %edi C carry
+
+ leal 1(%ecx), %eax C 32-l
+ xorl $31, %ecx C l
+
+ movl %ecx, VAR_NORM
+ movl $-1, %edx
+
+ shll %cl, %ebp C d normalized
+ movd %eax, %mm7
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 giving edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ movl %eax, VAR_INVERSE
+ orl %ebx, %ebx C size
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl %eax, VAR_SRC
+ jz L(start_zero)
+
+ movl 8(%eax), %esi C src high limb
+ cmpl $1, %ebx
+ jz L(start_one)
+
+L(start_two_or_more):
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ cmpl $2, %ebx
+ je L(integer_two_left)
+ jmp L(integer_top)
+
+
+L(start_one):
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shll %cl, %esi C n10 = high << l
+ jmp L(integer_one_left)
+
+
+L(start_zero):
+ shll %cl, %edi C n2 = carry << l
+ movl $0, %esi C n10 = 0
+
+ C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then
+ C must have xsize!=0
+ jmp L(fraction_some)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C This loop runs at about 25 cycles, which is probably sub-optimal, and
+C certainly more than the dependent chain would suggest. A better loop, or
+C a better rough analysis of what's possible, would be welcomed.
+C
+C In the current implementation, the following successively dependent
+C micro-ops seem to exist.
+C
+C uops
+C n2+n1 1 (addl)
+C mul 5
+C q1+1 3 (addl/adcl)
+C mul 5
+C sub 3 (subl/sbbl)
+C addback 2 (cmov)
+C ---
+C 19
+C
+C Lack of registers hinders explicit scheduling and it might be that the
+C normal out of order execution isn't able to hide enough under the mul
+C latencies.
+C
+C Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than
+C cmov (and takes one uop off the dependent chain). A sarl/andl/addl
+C combination was tried for the addback (despite the fact it would lengthen
+C the dependent chain) but found to be no faster.
+
+
+ ALIGN(16)
+L(integer_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp d
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+ movl VAR_SRC, %ecx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+ movq (%ecx), %mm0 C next src limb and the one below it
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ subl $4, %ecx
+
+ movl %ecx, VAR_SRC
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ movl %ebp, %eax C d
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+
+ mull %ebx C (q1+1)*d
+
+ movl VAR_DST, %ecx
+ psrlq %mm7, %mm0
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+ movl VAR_DST_STOP, %eax
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+
+ sbbl $0, %ebx C q
+ subl $4, %ecx
+
+ movl %ebx, (%ecx)
+ cmpl %eax, %ecx
+
+ movl %ecx, VAR_DST
+ jne L(integer_top)
+
+
+L(integer_loop_done):
+
+
+C -----------------------------------------------------------------------------
+C
+C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz
+C q1_ff special case. This make the code a bit smaller and simpler, and
+C costs only 2 cycles (each).
+
+L(integer_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+ movl PARAM_SRC, %ecx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd (%ecx), %mm0 C src low limb
+
+ movl VAR_DST_STOP, %ecx
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+
+ sbbl $0, %ebx C q
+
+ movl %ebx, -4(%ecx)
+
+
+C -----------------------------------------------------------------------------
+L(integer_one_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+ movl VAR_DST_STOP, %ecx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx C q1 if q1+1 overflowed
+
+ mull %ebx
+
+ C
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+ movl PARAM_XSIZE, %eax
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+
+ sbbl $0, %ebx C q
+
+ movl %ebx, -8(%ecx)
+ subl $8, %ecx
+
+
+
+ orl %eax, %eax C xsize
+ jnz L(fraction_some)
+
+ movl %edi, %eax
+L(fraction_done):
+ movl VAR_NORM, %ecx
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EDI, %edi
+
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBX, %ebx
+ addl $STACK_SPACE, %esp
+
+ shrl %cl, %eax
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx
+ C edx
+ C esi n10
+ C edi n2
+ C ebp divisor
+
+ movl VAR_DST, %ecx
+ movl VAR_DST_STOP, %edx
+ subl $4, %ecx
+
+ movl %ecx, VAR_DST
+ psrlq %mm7, %mm0
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+
+ movl $-1, (%ecx)
+ movd %mm0, %esi C next n10
+
+ cmpl %ecx, %edx
+ jne L(integer_top)
+
+ jmp L(integer_loop_done)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C In the current implementation, the following successively dependent
+C micro-ops seem to exist.
+C
+C uops
+C mul 5
+C q1+1 1 (addl)
+C mul 5
+C sub 3 (negl/sbbl)
+C addback 2 (cmov)
+C ---
+C 16
+C
+C The loop in fact runs at about 17.5 cycles. Using a sarl/andl/addl for
+C the addback was found to be a touch slower.
+
+
+ ALIGN(16)
+L(fraction_some):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi carry
+ C ebp divisor
+
+ movl PARAM_DST, %esi
+ movl VAR_DST_STOP, %ecx
+ movl %edi, %eax
+
+ subl $8, %ecx
+
+
+ ALIGN(16)
+L(fraction_top):
+ C eax n2, then scratch
+ C ebx scratch (nadj, q1)
+ C ecx dst, decrementing
+ C edx scratch
+ C esi dst stop point
+ C edi n2
+ C ebp divisor
+
+ mull VAR_INVERSE C m*n2
+
+ movl %ebp, %eax C d
+ subl $4, %ecx C dst
+ leal 1(%edi), %ebx
+
+ C
+
+ C
+
+ C
+
+ addl %edx, %ebx C 1 + high(n2<<32 + m*n2) = q1+1
+
+ mull %ebx C (q1+1)*d
+
+ C
+
+ C
+
+ C
+
+ C
+
+ negl %eax C low of n - (q1+1)*d
+
+ sbbl %edx, %edi C high of n - (q1+1)*d, caring only about carry
+ leal (%ebp,%eax), %edx
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+
+ sbbl $0, %ebx C q
+ movl %eax, %edi C remainder->n2
+ cmpl %esi, %ecx
+
+ movl %ebx, (%ecx) C previous q
+ jne L(fraction_top)
+
+
+ jmp L(fraction_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/mmx/mod_1.asm b/rts/gmp/mpn/x86/p6/mmx/mod_1.asm
new file mode 100644
index 0000000000..e7d8d94d33
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/mmx/mod_1.asm
@@ -0,0 +1,444 @@
+dnl Intel Pentium-II mpn_mod_1 -- mpn by limb remainder.
+dnl
+dnl P6MMX: 24.0 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C The code here very similar to mpn_divrem_1, but with the quotient
+C discarded. What's here probably isn't optimal.
+C
+C See mpn/x86/p6/mmx/divrem_1.c and mpn/x86/k7/mmx/mod_1.asm for some
+C comments.
+
+
+dnl MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC_STOP,-28)
+
+deflit(STACK_SPACE, 28)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ jmp LF(mpn_mod_1,start_1c)
+
+EPILOGUE()
+
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl $0, %edx C initial carry (if can't skip a div)
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C carry flag if high<divisor
+
+ cmovc( %eax, %edx) C src high limb as initial carry
+ sbbl $0, %ecx C size-1 to skip one div
+ jz L(divide_done)
+
+
+ ALIGN(16)
+L(start_1c):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ cmpl $MUL_THRESHOLD, %ecx
+ jae L(mul_by_inverse)
+
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl %ebp
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl %edx, %eax
+
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ movl %ebx, SAVE_EBX
+ leal -4(%esi), %ebx
+
+ movl %ebx, VAR_SRC_STOP
+ movl %ecx, %ebx C size
+
+ movl %edi, SAVE_EDI
+ movl %edx, %edi C carry
+
+ bsrl %ebp, %ecx C 31-l
+ movl $-1, %edx
+
+ leal 1(%ecx), %eax C 32-l
+ xorl $31, %ecx C l
+
+ movl %ecx, VAR_NORM
+ shll %cl, %ebp C d normalized
+
+ movd %eax, %mm7
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ C
+
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl 8(%eax), %esi C src high limb
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ movl %eax, %ecx C &src[size-3]
+
+
+ifelse(MUL_THRESHOLD,2,`
+ cmpl $2, %ebx
+ je L(inverse_two_left)
+')
+
+
+C The dependent chain here is the same as in mpn_divrem_1, but a few
+C instructions are saved by not needing to store the quotient limbs. This
+C gets it down to 24 c/l, which is still a bit away from a theoretical 19
+C c/l.
+
+ ALIGN(16)
+L(inverse_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx src pointer, decrementing
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next src limb and the one below it
+ subl $4, %ecx
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+ movl VAR_SRC_STOP, %ebx
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+ cmpl %ebx, %ecx
+
+ jne L(inverse_top)
+
+
+L(inverse_loop_done):
+
+
+C -----------------------------------------------------------------------------
+
+L(inverse_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx &src[-1]
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src dword)
+ C mm7 rshift
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd 4(%ecx), %mm0 C src low limb
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+ movl %ebp, %eax C d
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+
+
+C One limb left
+
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movl VAR_NORM, %ecx C for final denorm
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+ movl %ebp, %eax C d
+
+ mull %ebx C (q1+1)*d
+
+ movl SAVE_EBX, %ebx
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ leal (%ebp,%esi), %edx
+ movl SAVE_EBP, %ebp
+
+ movl %esi, %eax C remainder
+ movl SAVE_ESI, %esi
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ movl SAVE_EDI, %edi
+
+ shrl %cl, %eax C denorm remainder
+ addl $STACK_SPACE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx src pointer
+ C edx
+ C esi n10
+ C edi (n2)
+ C ebp divisor
+
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ movl VAR_SRC_STOP, %edx
+ psrlq %mm7, %mm0
+
+ movd %mm0, %esi C next n10
+ cmpl %ecx, %edx
+ jne L(inverse_top)
+
+ jmp L(inverse_loop_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/mmx/popham.asm b/rts/gmp/mpn/x86/p6/mmx/popham.asm
new file mode 100644
index 0000000000..50f9a11218
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/mmx/popham.asm
@@ -0,0 +1,31 @@
+dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and
+dnl hamming distance.
+dnl
+dnl P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb
+dnl (approx)
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/rts/gmp/mpn/x86/p6/p3mmx/popham.asm b/rts/gmp/mpn/x86/p6/p3mmx/popham.asm
new file mode 100644
index 0000000000..e63fbf334b
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/p3mmx/popham.asm
@@ -0,0 +1,30 @@
+dnl Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and
+dnl hamming distance.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Haven't actually measured it, but the K7 code with the psadbw should be
+dnl good on P-III.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/rts/gmp/mpn/x86/p6/sqr_basecase.asm b/rts/gmp/mpn/x86/p6/sqr_basecase.asm
new file mode 100644
index 0000000000..174c78406a
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/sqr_basecase.asm
@@ -0,0 +1,641 @@
+dnl Intel P6 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular
+dnl product (measured on the speed difference between 20 and 40 limbs,
+dnl which is the Karatsuba recursing range).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for
+dnl a description. The only difference here is that UNROLL_COUNT can go up
+dnl to 64 (not 63) making KARATSUBA_SQR_THRESHOLD_MAX 67.
+
+deflit(KARATSUBA_SQR_THRESHOLD_MAX, 67)
+
+ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',
+`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')
+
+m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')
+deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the given size
+C is small.
+C
+C The code size might look a bit excessive, but not all of it is executed so
+C it won't all get into the code cache. The 1x1, 2x2 and 3x3 special cases
+C clearly apply only to those sizes; mid sizes like 10x10 only need part of
+C the unrolled addmul; and big sizes like 40x40 that do use the full
+C unrolling will least be making good use of it, because 40x40 will take
+C something like 7000 cycles.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %edx
+
+ movl PARAM_SRC, %eax
+
+ cmpl $2, %edx
+ movl PARAM_DST, %ecx
+ je L(two_limbs)
+
+ movl (%eax), %eax
+ ja L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+ C eax src limb
+ C ebx
+ C ecx dst
+ C edx
+
+ mull %eax
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx dst
+ C edx
+
+defframe(SAVE_ESI, -4)
+defframe(SAVE_EBX, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(`STACK_SPACE',16)
+
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl %eax, %esi
+ movl (%eax), %eax
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx) C dst[0]
+ movl 4(%esi), %eax
+
+ movl %ebx, SAVE_EBX
+ movl %edx, %ebx C dst[1]
+
+ mull %eax C src[1]^2
+
+ movl %edi, SAVE_EDI
+ movl %eax, %edi C dst[2]
+ movl (%esi), %eax
+
+ movl %ebp, SAVE_EBP
+ movl %edx, %ebp C dst[3]
+
+ mull 4(%esi) C src[0]*src[1]
+
+ addl %eax, %ebx
+ movl SAVE_ESI, %esi
+
+ adcl %edx, %edi
+
+ adcl $0, %ebp
+ addl %ebx, %eax
+ movl SAVE_EBX, %ebx
+
+ adcl %edi, %edx
+ movl SAVE_EDI, %edi
+
+ adcl $0, %ebp
+
+ movl %eax, 4(%ecx)
+
+ movl %ebp, 12(%ecx)
+ movl SAVE_EBP, %ebp
+
+ movl %edx, 8(%ecx)
+ addl $FRAME, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(three_or_more):
+ C eax src low limb
+ C ebx
+ C ecx dst
+ C edx size
+deflit(`FRAME',0)
+
+ pushl %esi defframe_pushl(`SAVE_ESI')
+ cmpl $4, %edx
+
+ movl PARAM_SRC, %esi
+ jae L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+
+ C eax src low limb
+ C ebx
+ C ecx dst
+ C edx
+ C esi src
+ C edi
+ C ebp
+
+ pushl %ebp defframe_pushl(`SAVE_EBP')
+ pushl %edi defframe_pushl(`SAVE_EDI')
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ movl 4(%esi), %eax
+ xorl %ebp, %ebp
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl %edx, 12(%ecx)
+ movl 8(%esi), %eax
+
+ pushl %ebx defframe_pushl(`SAVE_EBX')
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl %edx, 20(%ecx)
+
+ movl (%esi), %eax
+
+ mull 4(%esi) C src[0] * src[1]
+
+ movl %eax, %ebx
+ movl %edx, %edi
+
+ movl (%esi), %eax
+
+ mull 8(%esi) C src[0] * src[2]
+
+ addl %eax, %edi
+ movl %edx, %ebp
+
+ adcl $0, %ebp
+ movl 4(%esi), %eax
+
+ mull 8(%esi) C src[1] * src[2]
+
+ xorl %esi, %esi
+ addl %eax, %ebp
+
+ C eax
+ C ebx dst[1]
+ C ecx dst
+ C edx dst[4]
+ C esi zero, will be dst[5]
+ C edi dst[2]
+ C ebp dst[3]
+
+ adcl $0, %edx
+ addl %ebx, %ebx
+
+ adcl %edi, %edi
+
+ adcl %ebp, %ebp
+
+ adcl %edx, %edx
+ movl 4(%ecx), %eax
+
+ adcl $0, %esi
+ addl %ebx, %eax
+
+ movl %eax, 4(%ecx)
+ movl 8(%ecx), %eax
+
+ adcl %edi, %eax
+ movl 12(%ecx), %ebx
+
+ adcl %ebp, %ebx
+ movl 16(%ecx), %edi
+
+ movl %eax, 8(%ecx)
+ movl SAVE_EBP, %ebp
+
+ movl %ebx, 12(%ecx)
+ movl SAVE_EBX, %ebx
+
+ adcl %edx, %edi
+ movl 20(%ecx), %eax
+
+ movl %edi, 16(%ecx)
+ movl SAVE_EDI, %edi
+
+ adcl %esi, %eax C no carry out of this
+ movl SAVE_ESI, %esi
+
+ movl %eax, 20(%ecx)
+ addl $FRAME, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+defframe(VAR_COUNTER,-20)
+defframe(VAR_JMP, -24)
+deflit(`STACK_SPACE',24)
+
+L(four_or_more):
+ C eax src low limb
+ C ebx
+ C ecx
+ C edx size
+ C esi src
+ C edi
+ C ebp
+deflit(`FRAME',4) dnl %esi already pushed
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+ subl $STACK_SPACE-FRAME, %esp
+deflit(`FRAME',STACK_SPACE)
+ movl $1, %ecx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebx, SAVE_EBX
+ subl %edx, %ecx C -(size-1)
+
+ movl %ebp, SAVE_EBP
+ movl $0, %ebx C initial carry
+
+ leal (%esi,%edx,4), %esi C &src[size]
+ movl %eax, %ebp C multiplier
+
+ leal -4(%edi,%edx,4), %edi C &dst[size-1]
+
+
+C This loop runs at just over 6 c/l.
+
+L(mul_1):
+ C eax scratch
+ C ebx carry
+ C ecx counter, limbs, negative, -(size-1) to -1
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size-1]
+ C ebp multiplier
+
+ movl %ebp, %eax
+
+ mull (%esi,%ecx,4)
+
+ addl %ebx, %eax
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ movl %eax, 4(%edi,%ecx,4)
+
+ incl %ecx
+ jnz L(mul_1)
+
+
+ movl %ebx, 4(%edi)
+
+
+C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two addmuls, which are the bottom right corner of the product
+C triangle, are left to the end. These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1]. If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as mpn_addmul_1(), see that routine for some
+C comments.
+C
+C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+
+dnl This is also hard-coded in the address calculation below.
+deflit(CODE_BYTES_PER_LIMB, 15)
+
+dnl With &src[size] and &dst[size-1] pointers, the displacements in the
+dnl unrolled code fit in a byte for UNROLL_COUNT values up to 32, but above
+dnl that an offset must be added to them.
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>32),1,
+eval((UNROLL_COUNT-32)*4),
+0))
+
+ C eax
+ C ebx carry
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[size-1]
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+
+ subl $4, %ecx
+ jz L(corner)
+
+ movl %ecx, %edx
+ negl %ecx
+
+ shll $4, %ecx
+ifelse(OFFSET,0,,`subl $OFFSET, %esi')
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+ negl %edx
+
+ifelse(OFFSET,0,,`subl $OFFSET, %edi')
+
+ C The calculated jump mustn't be before the start of the available
+ C code. This is the limit that UNROLL_COUNT puts on the src operand
+ C size, but checked here using the jump address directly.
+
+ ASSERT(ae,
+ `movl_text_address( L(unroll_inner_start), %eax)
+ cmpl %eax, %ecx')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll_outer_top):
+ C eax
+ C ebx high limb to store
+ C ecx VAR_JMP
+ C edx VAR_COUNTER, limbs, negative
+ C esi &src[size], constant
+ C edi dst ptr, second highest limb of last addmul
+ C ebp
+
+ movl -12+OFFSET(%esi,%edx,4), %ebp C multiplier
+ movl %edx, VAR_COUNTER
+
+ movl -8+OFFSET(%esi,%edx,4), %eax C first limb of multiplicand
+
+ mull %ebp
+
+define(cmovX,`ifelse(eval(UNROLL_COUNT%2),1,`cmovz($@)',`cmovnz($@)')')
+
+ testb $1, %cl
+
+ movl %edx, %ebx C high carry
+ leal 4(%edi), %edi
+
+ movl %ecx, %edx C jump
+
+ movl %eax, %ecx C low carry
+ leal CODE_BYTES_PER_LIMB(%edx), %edx
+
+ cmovX( %ebx, %ecx) C high carry reverse
+ cmovX( %eax, %ebx) C low carry reverse
+ movl %edx, VAR_JMP
+ jmp *%edx
+
+
+ C Must be on an even address here so the low bit of the jump address
+ C will indicate which way around ecx/ebx should start.
+
+ ALIGN(2)
+
+L(unroll_inner_start):
+ C eax scratch
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src pointer
+ C edi dst pointer
+ C ebp multiplier
+ C
+ C 15 code bytes each limb
+ C ecx/ebx reversed on each chunk
+
+forloop(`i', UNROLL_COUNT, 1, `
+ deflit(`disp_src', eval(-i*4 + OFFSET))
+ deflit(`disp_dst', eval(disp_src))
+
+ m4_assert(`disp_src>=-128 && disp_src<128')
+ m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl, disp_src,(%esi), %eax)
+ mull %ebp
+Zdisp( addl, %ebx, disp_dst,(%edi))
+ adcl %eax, %ecx
+ movl %edx, %ebx
+ adcl $0, %ebx
+',`
+ dnl this one comes out last
+Zdisp( movl, disp_src,(%esi), %eax)
+ mull %ebp
+Zdisp( addl, %ecx, disp_dst,(%edi))
+ adcl %eax, %ebx
+ movl %edx, %ecx
+ adcl $0, %ecx
+')
+')
+L(unroll_inner_end):
+
+ addl %ebx, m4_empty_if_zero(OFFSET)(%edi)
+
+ movl VAR_COUNTER, %edx
+ adcl $0, %ecx
+
+ movl %ecx, m4_empty_if_zero(OFFSET+4)(%edi)
+ movl VAR_JMP, %ecx
+
+ incl %edx
+ jnz L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+ addl $OFFSET, %esi
+ addl $OFFSET, %edi
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(corner):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[2*size-5]
+ C ebp
+
+ movl -12(%esi), %eax
+
+ mull -8(%esi)
+
+ addl %eax, (%edi)
+ movl -12(%esi), %eax
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+
+ mull -4(%esi)
+
+ addl %eax, %ebx
+ movl -8(%esi), %eax
+
+ adcl $0, %edx
+
+ addl %ebx, 4(%edi)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+
+ mull -4(%esi)
+
+ movl PARAM_SIZE, %ecx
+ addl %ebx, %eax
+
+ adcl $0, %edx
+
+ movl %eax, 8(%edi)
+
+ movl %edx, 12(%edi)
+ movl PARAM_DST, %edi
+
+
+C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1].
+
+ subl $1, %ecx C size-1
+ xorl %eax, %eax C ready for final adcl, and clear carry
+
+ movl %ecx, %edx
+ movl PARAM_SRC, %esi
+
+
+L(lshift):
+ C eax
+ C ebx
+ C ecx counter, size-1 to 1
+ C edx size-1 (for later use)
+ C esi src (for later use)
+ C edi dst, incrementing
+ C ebp
+
+ rcll 4(%edi)
+ rcll 8(%edi)
+
+ leal 8(%edi), %edi
+ decl %ecx
+ jnz L(lshift)
+
+
+ adcl %eax, %eax
+
+ movl %eax, 4(%edi) C dst most significant limb
+ movl (%esi), %eax C src[0]
+
+ leal 4(%esi,%edx,4), %esi C &src[size]
+ subl %edx, %ecx C -(size-1)
+
+
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+
+ mull %eax
+
+ movl %eax, (%edi,%ecx,8) C dst[0]
+
+
+L(diag):
+ C eax scratch
+ C ebx scratch
+ C ecx counter, negative
+ C edx carry
+ C esi &src[size]
+ C edi dst[2*size-2]
+ C ebp
+
+ movl (%esi,%ecx,4), %eax
+ movl %edx, %ebx
+
+ mull %eax
+
+ addl %ebx, 4(%edi,%ecx,8)
+ adcl %eax, 8(%edi,%ecx,8)
+ adcl $0, %edx
+
+ incl %ecx
+ jnz L(diag)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+
+ addl %edx, 4(%edi) C dst most significant limb
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ifdef(`PIC',`
+L(pic_calc):
+ addl (%esp), %ecx
+ addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
+ addl %edx, %ecx
+ ret
+')
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/README b/rts/gmp/mpn/x86/pentium/README
new file mode 100644
index 0000000000..3b9ec8ac6f
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/README
@@ -0,0 +1,77 @@
+
+ INTEL PENTIUM P5 MPN SUBROUTINES
+
+
+This directory contains mpn functions optimized for Intel Pentium (P5,P54)
+processors. The mmx subdirectory has code for Pentium with MMX (P55).
+
+
+STATUS
+
+ cycles/limb
+
+ mpn_add_n/sub_n 2.375
+
+ mpn_copyi/copyd 1.0
+
+ mpn_divrem_1 44.0
+ mpn_mod_1 44.0
+ mpn_divexact_by3 15.0
+
+ mpn_l/rshift 5.375 normal (6.0 on P54)
+ 1.875 special shift by 1 bit
+
+ mpn_mul_1 13.0
+ mpn_add/submul_1 14.0
+
+ mpn_mul_basecase 14.2 cycles/crossproduct (approx)
+
+ mpn_sqr_basecase 8 cycles/crossproduct (approx)
+ or 15.5 cycles/triangleproduct (approx)
+
+Pentium MMX gets the following improvements
+
+ mpn_l/rshift 1.75
+
+
+1. mpn_lshift and mpn_rshift run at about 6 cycles/limb on P5 and P54, but the
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically. The P55 runs them at the expected speed.
+
+2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
+
+3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should. Intel documentation says a mul instruction is 10 cycles, but it
+measures 9 and the routines using it run with it as 9.
+
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. Pentium doesn't allocate cache lines on writes, unlike most other modern
+processors. Since the functions in the mpn class do array writes, we have to
+handle allocating the destination cache lines by reading a word from it in the
+loops, to achieve the best performance.
+
+2. Pairing of memory operations requires that the two issued operations refer
+to different cache banks. The simplest way to insure this is to read/write
+two words from the same object. If we make operations on different objects,
+they might or might not be to the same cache bank.
+
+
+
+REFERENCES
+
+"Intel Architecture Optimization Manual", 1997, order number 242816. This
+is mostly about P5, the parts about P6 aren't relevant. Available on-line:
+
+ http://download.intel.com/design/PentiumII/manuals/242816.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/pentium/aors_n.asm b/rts/gmp/mpn/x86/pentium/aors_n.asm
new file mode 100644
index 0000000000..a61082a456
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/aors_n.asm
@@ -0,0 +1,196 @@
+dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+dnl
+dnl P5: 2.375 cycles/limb
+
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_add_n',`
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(M4_function_nc)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%ebp
+ movl PARAM_SIZE,%ecx
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx C zero carry flag
+ jz L(endgo)
+
+ pushl %edx
+FRAME_pushl()
+ movl PARAM_CARRY,%eax
+ shrl $1,%eax C shift bit 0 into carry
+ jmp LF(M4_function_n,oop)
+
+L(endgo):
+deflit(`FRAME',16)
+ movl PARAM_CARRY,%eax
+ shrl $1,%eax C shift bit 0 into carry
+ jmp LF(M4_function_n,end)
+
+EPILOGUE()
+
+
+ ALIGN(8)
+PROLOGUE(M4_function_n)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%ebp
+ movl PARAM_SIZE,%ecx
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx C zero carry flag
+ jz L(end)
+ pushl %edx
+FRAME_pushl()
+
+ ALIGN(8)
+L(oop): movl 28(%edi),%eax C fetch destination cache line
+ leal 32(%edi),%edi
+
+L(1): movl (%esi),%eax
+ movl 4(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 4(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L(2): movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 12(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L(3): movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 20(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L(4): movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 28(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
+ decl %ecx
+ jnz L(oop)
+
+ popl %edx
+FRAME_popl()
+L(end):
+ decl %edx C test %edx w/o clobbering carry
+ js L(end2)
+ incl %edx
+L(oop2):
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ M4_inst %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
+ jnz L(oop2)
+L(end2):
+ movl (%esi),%eax
+ M4_inst %ebx,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/aorsmul_1.asm b/rts/gmp/mpn/x86/pentium/aorsmul_1.asm
new file mode 100644
index 0000000000..147b55610f
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/aorsmul_1.asm
@@ -0,0 +1,99 @@
+dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
+dnl
+dnl P5: 14.0 cycles/limb
+
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA. */
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_addmul_1', `
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+
+',`ifdef(`OPERATION_submul_1', `
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(M4_function_1)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST, %edi
+ movl PARAM_SRC, %esi
+ movl PARAM_SIZE, %ecx
+ movl PARAM_MULTIPLIER, %ebp
+
+ leal (%edi,%ecx,4), %edi
+ leal (%esi,%ecx,4), %esi
+ negl %ecx
+ xorl %ebx, %ebx
+ ALIGN(8)
+
+L(oop): adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl (%edi,%ecx,4), %ebx
+
+ adcl $0, %edx
+ M4_inst %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(oop)
+
+ adcl $0, %ebx
+ movl %ebx, %eax
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/diveby3.asm b/rts/gmp/mpn/x86/pentium/diveby3.asm
new file mode 100644
index 0000000000..dbac81642f
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/diveby3.asm
@@ -0,0 +1,183 @@
+dnl Intel P5 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl P5: 15.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+dnl ceil(b/3), ceil(b*2/3) and floor(b*2/3) where b=2^32
+deflit(ONE_THIRD_CEIL, 0x55555556)
+deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB)
+deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %ecx
+ movl PARAM_SIZE, %edx
+
+ decl %edx
+ jnz L(two_or_more)
+
+ movl (%ecx), %edx
+ movl PARAM_CARRY, %eax C risk of cache bank clash here
+
+ movl PARAM_DST, %ecx
+ subl %eax, %edx
+
+ sbbl %eax, %eax C 0 or -1
+
+ imull $INVERSE_3, %edx, %edx
+
+ negl %eax C 0 or 1
+ cmpl $ONE_THIRD_CEIL, %edx
+
+ sbbl $-1, %eax C +1 if edx>=ceil(b/3)
+ cmpl $TWO_THIRDS_CEIL, %edx
+
+ sbbl $-1, %eax C +1 if edx>=ceil(b*2/3)
+ movl %edx, (%ecx)
+
+ ret
+
+
+L(two_or_more):
+ C eax
+ C ebx
+ C ecx src
+ C edx size-1
+ C esi
+ C edi
+ C ebp
+
+ pushl %ebx FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ pushl %edi FRAME_pushl()
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_DST, %edi
+ movl PARAM_CARRY, %esi
+
+ movl (%ecx), %eax C src low limb
+ xorl %ebx, %ebx
+
+ sub %esi, %eax
+ movl $TWO_THIRDS_FLOOR, %esi
+
+ leal (%ecx,%edx,4), %ecx C &src[size-1]
+ leal (%edi,%edx,4), %edi C &dst[size-1]
+
+ adcl $0, %ebx C carry, 0 or 1
+ negl %edx C -(size-1)
+
+
+C The loop needs a source limb ready at the top, which leads to one limb
+C handled separately at the end, and the special case above for size==1.
+C There doesn't seem to be any scheduling that would keep the speed but move
+C the source load and carry subtract up to the top.
+C
+C The destination cache line prefetching adds 1 cycle to the loop but is
+C considered worthwhile. The slowdown is a factor of 1.07, but will prevent
+C repeated write-throughs if the destination isn't in L1. A version using
+C an outer loop to prefetch only every 8 limbs (a cache line) proved to be
+C no faster, due to unavoidable branch mispreditions in the inner loop.
+C
+C setc is 2 cycles on P54, so an adcl is used instead. If the movl $0,%ebx
+C could be avoided then the src limb fetch could pair up and save a cycle.
+C This would probably mean going to a two limb loop with the carry limb
+C alternately positive or negative, since an sbbl %ebx,%ebx will leave a
+C value which is in the opposite sense to the preceding sbbl/adcl %ebx,%eax.
+C
+C A register is used for TWO_THIRDS_FLOOR because a cmp can't be done as
+C "cmpl %edx, $n" with the immediate as the second operand.
+C
+C The "4" source displacement is in the loop rather than the setup because
+C this gets L(top) aligned to 8 bytes at no cost.
+
+ ALIGN(8)
+L(top):
+ C eax source limb, carry subtracted
+ C ebx carry (0 or 1)
+ C ecx &src[size-1]
+ C edx counter, limbs, negative
+ C esi TWO_THIRDS_FLOOR
+ C edi &dst[size-1]
+ C ebp scratch (result limb)
+
+ imull $INVERSE_3, %eax, %ebp
+
+ cmpl $ONE_THIRD_CEIL, %ebp
+ movl (%edi,%edx,4), %eax C dst cache line prefetch
+
+ sbbl $-1, %ebx C +1 if ebp>=ceil(b/3)
+ cmpl %ebp, %esi
+
+ movl 4(%ecx,%edx,4), %eax C next src limb
+
+ sbbl %ebx, %eax C and further -1 if ebp>=ceil(b*2/3)
+ movl $0, %ebx
+
+ adcl $0, %ebx C new carry
+ movl %ebp, (%edi,%edx,4)
+
+ incl %edx
+ jnz L(top)
+
+
+
+ imull $INVERSE_3, %eax, %edx
+
+ cmpl $ONE_THIRD_CEIL, %edx
+ movl %edx, (%edi)
+
+ sbbl $-1, %ebx C +1 if edx>=ceil(b/3)
+ cmpl $TWO_THIRDS_CEIL, %edx
+
+ sbbl $-1, %ebx C +1 if edx>=ceil(b*2/3)
+ popl %ebp
+
+ movl %ebx, %eax
+ popl %edi
+
+ popl %esi
+ popl %ebx
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/gmp-mparam.h b/rts/gmp/mpn/x86/pentium/gmp-mparam.h
new file mode 100644
index 0000000000..d3ed3d73ce
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/gmp-mparam.h
@@ -0,0 +1,97 @@
+/* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 9 /* cycles */
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 41 /* cycles */
+#endif
+
+/* bsf takes 18-42 cycles, put an average for uniform random numbers */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 20 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 14
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 179
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 22
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 153
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 46
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 110
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 13
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 25
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 496, 928, 1920, 4608, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 512
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 3840
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 496, 1184, 1920, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 512
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 3840
+#endif
diff --git a/rts/gmp/mpn/x86/pentium/lshift.asm b/rts/gmp/mpn/x86/pentium/lshift.asm
new file mode 100644
index 0000000000..e1e35d4c57
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/lshift.asm
@@ -0,0 +1,236 @@
+dnl Intel Pentium mpn_lshift -- mpn left shift.
+dnl
+dnl cycles/limb
+dnl P5,P54: 6.0
+dnl P55: 5.375
+
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
+C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ebp
+ movl PARAM_SHIFT,%ecx
+
+C We can use faster code for shift-by-1 under certain conditions.
+ cmp $1,%ecx
+ jne L(normal)
+ leal 4(%esi),%eax
+ cmpl %edi,%eax
+ jnc L(special) C jump if s_ptr + 1 >= res_ptr
+ leal (%esi,%ebp,4),%eax
+ cmpl %eax,%edi
+ jnc L(special) C jump if res_ptr >= s_ptr + size
+
+L(normal):
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+ xorl %eax,%eax
+ shldl( %cl, %edx, %eax) C compute carry limb
+ pushl %eax C push carry limb onto stack
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz L(end)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(oop): movl -28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ shldl( %cl, %eax, %ebx)
+ shldl( %cl, %edx, %eax)
+ movl %ebx,(%edi)
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ shldl( %cl, %ebx, %edx)
+ shldl( %cl, %eax, %ebx)
+ movl %edx,-8(%edi)
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ shldl( %cl, %edx, %eax)
+ shldl( %cl, %ebx, %edx)
+ movl %eax,-16(%edi)
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ shldl( %cl, %eax, %ebx)
+ shldl( %cl, %edx, %eax)
+ movl %ebx,-24(%edi)
+ movl %eax,-28(%edi)
+
+ subl $32,%esi
+ subl $32,%edi
+ decl %ebp
+ jnz L(oop)
+
+L(end): popl %ebp
+ andl $7,%ebp
+ jz L(end2)
+L(oop2):
+ movl (%esi),%eax
+ shldl( %cl,%eax,%edx)
+ movl %edx,(%edi)
+ movl %eax,%edx
+ subl $4,%esi
+ subl $4,%edi
+ decl %ebp
+ jnz L(oop2)
+
+L(end2):
+ shll %cl,%edx C compute least significant limb
+ movl %edx,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(special):
+ movl (%esi),%edx
+ addl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ addl %edx,%edx
+ incl %ebp
+ decl %ebp
+ jz L(Lend)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(Loop):
+ movl 28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,(%edi)
+ adcl %edx,%edx
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ adcl %ebx,%ebx
+ movl %edx,8(%edi)
+ adcl %eax,%eax
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ adcl %edx,%edx
+ movl %eax,16(%edi)
+ adcl %ebx,%ebx
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,24(%edi)
+ adcl %edx,%edx
+ movl %eax,28(%edi)
+
+ leal 32(%esi),%esi C use leal not to clobber carry
+ leal 32(%edi),%edi
+ decl %ebp
+ jnz L(Loop)
+
+L(Lend):
+ popl %ebp
+ sbbl %eax,%eax C save carry in %eax
+ andl $7,%ebp
+ jz L(Lend2)
+ addl %eax,%eax C restore carry from eax
+L(Loop2):
+ movl %edx,%ebx
+ movl (%esi),%edx
+ adcl %edx,%edx
+ movl %ebx,(%edi)
+
+ leal 4(%esi),%esi C use leal not to clobber carry
+ leal 4(%edi),%edi
+ decl %ebp
+ jnz L(Loop2)
+
+ jmp L(L1)
+L(Lend2):
+ addl %eax,%eax C restore carry from eax
+L(L1): movl %edx,(%edi) C store last limb
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h b/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
new file mode 100644
index 0000000000..2379077d0c
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
@@ -0,0 +1,97 @@
+/* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 9 /* cycles */
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 41 /* cycles */
+#endif
+
+/* bsf takes 18-42 cycles, put an average for uniform random numbers */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 20 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 14
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 99
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 22
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 89
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 40
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 98
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 13
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 5
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 25
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 496, 1056, 1920, 4608, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 512
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 3840
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 496, 1184, 2176, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 512
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 4352
+#endif
diff --git a/rts/gmp/mpn/x86/pentium/mmx/lshift.asm b/rts/gmp/mpn/x86/pentium/mmx/lshift.asm
new file mode 100644
index 0000000000..2225438658
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/lshift.asm
@@ -0,0 +1,455 @@
+dnl Intel P5 mpn_lshift -- mpn left shift.
+dnl
+dnl P5: 1.75 cycles/limb.
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size left by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the right. Return the bits shifted out at the
+C left.
+C
+C The comments in mpn_rshift apply here too.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl minimum 5, because the unrolled loop can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_lshift)
+
+ pushl %ebx
+ pushl %edi
+deflit(`FRAME',8)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_DST, %edx
+
+ movl PARAM_SRC, %ebx
+ movl PARAM_SHIFT, %ecx
+
+ cmp $UNROLL_THRESHOLD, %eax
+ jae L(unroll)
+
+ movl -4(%ebx,%eax,4), %edi C src high limb
+ decl %eax
+
+ jnz L(simple)
+
+ shldl( %cl, %edi, %eax) C eax was decremented to zero
+
+ shll %cl, %edi
+
+ movl %edi, (%edx) C dst low limb
+ popl %edi C risk of data cache bank clash
+
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(simple):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd (%ebx,%eax,4), %mm5 C src high limb
+
+ movd %ecx, %mm6 C lshift
+ negl %ecx
+
+ psllq %mm6, %mm5
+ addl $32, %ecx
+
+ movd %ecx, %mm7
+ psrlq $32, %mm5 C retval
+
+
+L(simple_top):
+ C eax counter, limbs, negative
+ C ebx src
+ C ecx
+ C edx dst
+ C esi
+ C edi
+ C
+ C mm0 scratch
+ C mm5 return value
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%ebx,%eax,4), %mm0
+ decl %eax
+
+ psrlq %mm7, %mm0
+
+ C
+
+ movd %mm0, 4(%edx,%eax,4)
+ jnz L(simple_top)
+
+
+ movd (%ebx), %mm0
+
+ movd %mm5, %eax
+ psllq %mm6, %mm0
+
+ popl %edi
+ popl %ebx
+
+ movd %mm0, (%edx)
+
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(unroll):
+ C eax size
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd -4(%ebx,%eax,4), %mm5 C src high limb
+ leal (%ebx,%eax,4), %edi
+
+ movd %ecx, %mm6 C lshift
+ andl $4, %edi
+
+ psllq %mm6, %mm5
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process high limb separately (marked xxx) to
+ C make it so.
+ C
+ C source -8(ebx,%eax,4)
+ C |
+ C +-------+-------+-------+--
+ C | |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+ C
+ C dest
+ C -4(edx,%eax,4)
+ C |
+ C +-------+-------+--
+ C | xxx | |
+ C +-------+-------+--
+
+ movq -8(%ebx,%eax,4), %mm0 C unaligned load
+
+ psllq %mm6, %mm0
+ decl %eax
+
+ psrlq $32, %mm0
+
+ C
+
+ movd %mm0, (%edx,%eax,4)
+L(start_src_aligned):
+
+ movq -8(%ebx,%eax,4), %mm1 C src high qword
+ leal (%edx,%eax,4), %edi
+
+ andl $4, %edi
+ psrlq $32, %mm5 C return value
+
+ movq -16(%ebx,%eax,4), %mm3 C src second highest qword
+ jz L(start_dst_aligned)
+
+ C dst isn't aligned, subtract 4 to make it so, and pretend the shift
+ C is 32 bits extra. High limb of dst (marked xxx) handled here
+ C separately.
+ C
+ C source -8(ebx,%eax,4)
+ C |
+ C +-------+-------+--
+ C | mm1 |
+ C +-------+-------+--
+ C 0mod8 4mod8
+ C
+ C dest
+ C -4(edx,%eax,4)
+ C |
+ C +-------+-------+-------+--
+ C | xxx | |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+
+ movq %mm1, %mm0
+ addl $32, %ecx C new shift
+
+ psllq %mm6, %mm0
+
+ movd %ecx, %mm6
+ psrlq $32, %mm0
+
+ C wasted cycle here waiting for %mm0
+
+ movd %mm0, -4(%edx,%eax,4)
+ subl $4, %edx
+L(start_dst_aligned):
+
+
+ psllq %mm6, %mm1
+ negl %ecx C -shift
+
+ addl $64, %ecx C 64-shift
+ movq %mm3, %mm2
+
+ movd %ecx, %mm7
+ subl $8, %eax C size-8
+
+ psrlq %mm7, %mm3
+
+ por %mm1, %mm3 C mm3 ready to store
+ jc L(finish)
+
+
+ C The comments in mpn_rshift apply here too.
+
+ ALIGN(8)
+L(unroll_loop):
+ C eax counter, limbs
+ C ebx src
+ C ecx
+ C edx dst
+ C esi
+ C edi
+ C
+ C mm0
+ C mm1
+ C mm2 src qword from 48(%ebx,%eax,4)
+ C mm3 dst qword ready to store to 56(%edx,%eax,4)
+ C
+ C mm5 return value
+ C mm6 lshift
+ C mm7 rshift
+
+ movq 8(%ebx,%eax,4), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ movq %mm3, 24(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq (%ebx,%eax,4), %mm3 C
+ psllq %mm6, %mm1 C
+
+ movq %mm0, 16(%edx,%eax,4)
+ movq %mm3, %mm2 C
+
+ psrlq %mm7, %mm3 C
+ subl $4, %eax
+
+ por %mm1, %mm3 C
+ jnc L(unroll_loop)
+
+
+
+L(finish):
+ C eax -4 to -1 representing respectively 0 to 3 limbs remaining
+
+ testb $2, %al
+
+ jz L(finish_no_two)
+
+ movq 8(%ebx,%eax,4), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ movq %mm3, 24(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq %mm1, %mm2
+ movq %mm0, %mm3
+
+ subl $2, %eax
+L(finish_no_two):
+
+
+ C eax -4 or -3 representing respectively 0 or 1 limbs remaining
+ C
+ C mm2 src prev qword, from 48(%ebx,%eax,4)
+ C mm3 dst qword, for 56(%edx,%eax,4)
+
+ testb $1, %al
+ movd %mm5, %eax C retval
+
+ popl %edi
+ jz L(finish_zero)
+
+
+ C One extra src limb, destination was aligned.
+ C
+ C source ebx
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edx+12 edx+4 edx
+ C --+---------------+---------------+-------+
+ C | mm3 | | |
+ C --+---------------+---------------+-------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C One extra src limb, destination was unaligned.
+ C
+ C source ebx
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edx+12 edx+4
+ C --+---------------+---------------+
+ C | mm3 | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword at 4(%edx), and in the aligned case
+ C there's an extra limb of dst to be formed from that extra src limb
+ C left shifted.
+
+
+ movd (%ebx), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm3, 12(%edx)
+ psllq $32, %mm0
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm2, %mm0
+ psllq %mm6, %mm1
+
+ movq %mm0, 4(%edx)
+ psrlq $32, %mm1
+
+ andl $32, %ecx
+ popl %ebx
+
+ jz L(finish_one_unaligned)
+
+ movd %mm1, (%edx)
+L(finish_one_unaligned):
+
+ emms
+
+ ret
+
+
+L(finish_zero):
+
+ C No extra src limbs, destination was aligned.
+ C
+ C source ebx
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edx+8 edx
+ C --+---------------+---------------+
+ C | mm3 | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C No extra src limbs, destination was unaligned.
+ C
+ C source ebx
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edx+8 edx+4
+ C --+---------------+-------+
+ C | mm3 | |
+ C --+---------------+-------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C The movd for the unaligned case writes the same data to 4(%edx)
+ C that the movq does for the aligned case.
+
+
+ movq %mm3, 8(%edx)
+ andl $32, %ecx
+
+ psllq %mm6, %mm2
+ jz L(finish_zero_unaligned)
+
+ movq %mm2, (%edx)
+L(finish_zero_unaligned):
+
+ psrlq $32, %mm2
+ popl %ebx
+
+ movd %mm5, %eax C retval
+
+ movd %mm2, 4(%edx)
+
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mmx/popham.asm b/rts/gmp/mpn/x86/pentium/mmx/popham.asm
new file mode 100644
index 0000000000..587a07ab3d
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/popham.asm
@@ -0,0 +1,30 @@
+dnl Intel P55 mpn_popcount, mpn_hamdist -- population count and hamming
+dnl distance.
+dnl
+dnl P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/rts/gmp/mpn/x86/pentium/mmx/rshift.asm b/rts/gmp/mpn/x86/pentium/mmx/rshift.asm
new file mode 100644
index 0000000000..7672630d57
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/rshift.asm
@@ -0,0 +1,460 @@
+dnl Intel P5 mpn_rshift -- mpn right shift.
+dnl
+dnl P5: 1.75 cycles/limb.
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size right by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the left. Return the bits shifted out at the
+C right.
+C
+C It takes 6 mmx instructions to process 2 limbs, making 1.5 cycles/limb,
+C and with a 4 limb loop and 1 cycle of loop overhead the total is 1.75 c/l.
+C
+C Full speed depends on source and destination being aligned. Unaligned mmx
+C loads and stores on P5 don't pair and have a 2 cycle penalty. Some hairy
+C setups and finish-ups are done to ensure alignment for the loop.
+C
+C MMX shifts work out a bit faster even for the simple loop.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl Minimum 5, because the unrolled loop can't handle less.
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_rshift)
+
+ pushl %ebx
+ pushl %edi
+deflit(`FRAME',8)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_DST, %edx
+
+ movl PARAM_SRC, %ebx
+ movl PARAM_SHIFT, %ecx
+
+ cmp $UNROLL_THRESHOLD, %eax
+ jae L(unroll)
+
+ decl %eax
+ movl (%ebx), %edi C src low limb
+
+ jnz L(simple)
+
+ shrdl( %cl, %edi, %eax) C eax was decremented to zero
+
+ shrl %cl, %edi
+
+ movl %edi, (%edx) C dst low limb
+ popl %edi C risk of data cache bank clash
+
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(simple):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd (%ebx), %mm5 C src[0]
+ leal (%ebx,%eax,4), %ebx C &src[size-1]
+
+ movd %ecx, %mm6 C rshift
+ leal -4(%edx,%eax,4), %edx C &dst[size-2]
+
+ psllq $32, %mm5
+ negl %eax
+
+
+C This loop is 5 or 8 cycles, with every second load unaligned and a wasted
+C cycle waiting for the mm0 result to be ready. For comparison a shrdl is 4
+C cycles and would be 8 in a simple loop. Using mmx helps the return value
+C and last limb calculations too.
+
+L(simple_top):
+ C eax counter, limbs, negative
+ C ebx &src[size-1]
+ C ecx return value
+ C edx &dst[size-2]
+ C
+ C mm0 scratch
+ C mm5 return value
+ C mm6 shift
+
+ movq (%ebx,%eax,4), %mm0
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+ movd %mm0, (%edx,%eax,4)
+ jnz L(simple_top)
+
+
+ movd (%ebx), %mm0
+ psrlq %mm6, %mm5 C return value
+
+ psrlq %mm6, %mm0
+ popl %edi
+
+ movd %mm5, %eax
+ popl %ebx
+
+ movd %mm0, 4(%edx)
+
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(unroll):
+ C eax size
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd (%ebx), %mm5 C src[0]
+ movl $4, %edi
+
+ movd %ecx, %mm6 C rshift
+ testl %edi, %ebx
+
+ psllq $32, %mm5
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process low limb separately (marked xxx) and
+ C step src and dst by one limb, making src aligned.
+ C
+ C source ebx
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+ C
+ C dest edx
+ C --+-------+-------+
+ C | | xxx |
+ C --+-------+-------+
+
+ movq (%ebx), %mm0 C unaligned load
+
+ psrlq %mm6, %mm0
+ addl $4, %ebx
+
+ decl %eax
+
+ movd %mm0, (%edx)
+ addl $4, %edx
+L(start_src_aligned):
+
+
+ movq (%ebx), %mm1
+ testl %edi, %edx
+
+ psrlq %mm6, %mm5 C retval
+ jz L(start_dst_aligned)
+
+ C dst isn't aligned, add 4 to make it so, and pretend the shift is
+ C 32 bits extra. Low limb of dst (marked xxx) handled here
+ C separately.
+ C
+ C source ebx
+ C --+-------+-------+
+ C | mm1 |
+ C --+-------+-------+
+ C 4mod8 0mod8
+ C
+ C dest edx
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+
+ movq %mm1, %mm0
+ addl $32, %ecx C new shift
+
+ psrlq %mm6, %mm0
+
+ movd %ecx, %mm6
+
+ movd %mm0, (%edx)
+ addl $4, %edx
+L(start_dst_aligned):
+
+
+ movq 8(%ebx), %mm3
+ negl %ecx
+
+ movq %mm3, %mm2 C mm2 src qword
+ addl $64, %ecx
+
+ movd %ecx, %mm7
+ psrlq %mm6, %mm1
+
+ leal -12(%ebx,%eax,4), %ebx
+ leal -20(%edx,%eax,4), %edx
+
+ psllq %mm7, %mm3
+ subl $7, %eax C size-7
+
+ por %mm1, %mm3 C mm3 ready to store
+ negl %eax C -(size-7)
+
+ jns L(finish)
+
+
+ C This loop is the important bit, the rest is just support. Careful
+ C instruction scheduling achieves the claimed 1.75 c/l. The
+ C relevant parts of the pairing rules are:
+ C
+ C - mmx loads and stores execute only in the U pipe
+ C - only one mmx shift in a pair
+ C - wait one cycle before storing an mmx register result
+ C - the usual address generation interlock
+ C
+ C Two qword calculations are slightly interleaved. The instructions
+ C marked "C" belong to the second qword, and the "C prev" one is for
+ C the second qword from the previous iteration.
+
+ ALIGN(8)
+L(unroll_loop):
+ C eax counter, limbs, negative
+ C ebx &src[size-12]
+ C ecx
+ C edx &dst[size-12]
+ C esi
+ C edi
+ C
+ C mm0
+ C mm1
+ C mm2 src qword from -8(%ebx,%eax,4)
+ C mm3 dst qword ready to store to -8(%edx,%eax,4)
+ C
+ C mm5 return value
+ C mm6 rshift
+ C mm7 lshift
+
+ movq (%ebx,%eax,4), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ movq %mm3, -8(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq 8(%ebx,%eax,4), %mm3 C
+ psrlq %mm6, %mm1 C
+
+ movq %mm0, (%edx,%eax,4)
+ movq %mm3, %mm2 C
+
+ psllq %mm7, %mm3 C
+ addl $4, %eax
+
+ por %mm1, %mm3 C
+ js L(unroll_loop)
+
+
+L(finish):
+ C eax 0 to 3 representing respectively 3 to 0 limbs remaining
+
+ testb $2, %al
+
+ jnz L(finish_no_two)
+
+ movq (%ebx,%eax,4), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ movq %mm3, -8(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq %mm1, %mm2
+ movq %mm0, %mm3
+
+ addl $2, %eax
+L(finish_no_two):
+
+
+ C eax 2 or 3 representing respectively 1 or 0 limbs remaining
+ C
+ C mm2 src prev qword, from -8(%ebx,%eax,4)
+ C mm3 dst qword, for -8(%edx,%eax,4)
+
+ testb $1, %al
+ popl %edi
+
+ movd %mm5, %eax C retval
+ jnz L(finish_zero)
+
+
+ C One extra limb, destination was aligned.
+ C
+ C source ebx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edx
+ C +-------+---------------+---------------+--
+ C | | | mm3 |
+ C +-------+---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C One extra limb, destination was unaligned.
+ C
+ C source ebx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edx
+ C +---------------+---------------+--
+ C | | mm3 |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword at 8(%edx), and in the aligned case
+ C there's a further extra limb of dst to be formed.
+
+
+ movd 8(%ebx), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ movq %mm3, (%edx)
+ por %mm2, %mm0
+
+ psrlq %mm6, %mm1
+ andl $32, %ecx
+
+ popl %ebx
+ jz L(finish_one_unaligned)
+
+ C dst was aligned, must store one extra limb
+ movd %mm1, 16(%edx)
+L(finish_one_unaligned):
+
+ movq %mm0, 8(%edx)
+
+ emms
+
+ ret
+
+
+L(finish_zero):
+
+ C No extra limbs, destination was aligned.
+ C
+ C source ebx
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edx+4
+ C +---------------+---------------+--
+ C | | mm3 |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C No extra limbs, destination was unaligned.
+ C
+ C source ebx
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edx+4
+ C +-------+---------------+--
+ C | | mm3 |
+ C +-------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = 64-(shift+32)
+
+
+ C The movd for the unaligned case is clearly the same data as the
+ C movq for the aligned case, it's just a choice between whether one
+ C or two limbs should be written.
+
+
+ movq %mm3, 4(%edx)
+ psrlq %mm6, %mm2
+
+ movd %mm2, 12(%edx)
+ andl $32, %ecx
+
+ popl %ebx
+ jz L(finish_zero_unaligned)
+
+ movq %mm2, 12(%edx)
+L(finish_zero_unaligned):
+
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mul_1.asm b/rts/gmp/mpn/x86/pentium/mul_1.asm
new file mode 100644
index 0000000000..08639eca09
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mul_1.asm
@@ -0,0 +1,79 @@
+dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
+dnl
+dnl P5: 13.0 cycles/limb
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA. */
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_mul_1)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST, %edi
+ movl PARAM_SRC, %esi
+ movl PARAM_SIZE, %ecx
+ movl PARAM_MULTIPLIER, %ebp
+
+ leal (%edi,%ecx,4), %edi
+ leal (%esi,%ecx,4), %esi
+ negl %ecx
+ xorl %ebx, %ebx
+ ALIGN(8)
+
+L(oop): adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(oop)
+
+ adcl $0, %ebx
+ movl %ebx, %eax
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mul_basecase.asm b/rts/gmp/mpn/x86/pentium/mul_basecase.asm
new file mode 100644
index 0000000000..d9f79a0831
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mul_basecase.asm
@@ -0,0 +1,135 @@
+dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
+dnl
+dnl P5: 14.2 cycles/crossproduct (approx)
+
+
+dnl Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+
+defframe(PARAM_YSIZE, 20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE, 12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+defframe(VAR_COUNTER, -4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_mul_basecase)
+
+ pushl %eax C dummy push for allocating stack slot
+ pushl %esi
+ pushl %ebp
+ pushl %edi
+deflit(`FRAME',16)
+
+ movl PARAM_XP,%esi
+ movl PARAM_WP,%edi
+ movl PARAM_YP,%ebp
+
+ movl (%esi),%eax C load xp[0]
+ mull (%ebp) C multiply by yp[0]
+ movl %eax,(%edi) C store to wp[0]
+ movl PARAM_XSIZE,%ecx C xsize
+ decl %ecx C If xsize = 1, ysize = 1 too
+ jz L(done)
+
+ movl PARAM_XSIZE,%eax
+ pushl %ebx
+FRAME_pushl()
+ movl %edx,%ebx
+ leal (%esi,%eax,4),%esi C make xp point at end
+ leal (%edi,%eax,4),%edi C offset wp by xsize
+ negl %ecx C negate j size/index for inner loop
+ xorl %eax,%eax C clear carry
+
+ ALIGN(8)
+L(oop1): adcl $0,%ebx
+ movl (%esi,%ecx,4),%eax C load next limb at xp[j]
+ mull (%ebp)
+ addl %ebx,%eax
+ movl %eax,(%edi,%ecx,4)
+ incl %ecx
+ movl %edx,%ebx
+ jnz L(oop1)
+
+ adcl $0,%ebx
+ movl PARAM_YSIZE,%eax
+ movl %ebx,(%edi) C most significant limb of product
+ addl $4,%edi C increment wp
+ decl %eax
+ jz L(skip)
+ movl %eax,VAR_COUNTER C set index i to ysize
+
+L(outer):
+ addl $4,%ebp C make ebp point to next y limb
+ movl PARAM_XSIZE,%ecx
+ negl %ecx
+ xorl %ebx,%ebx
+
+ C code at 0x61 here, close enough to aligned
+L(oop2):
+ adcl $0,%ebx
+ movl (%esi,%ecx,4),%eax
+ mull (%ebp)
+ addl %ebx,%eax
+ movl (%edi,%ecx,4),%ebx
+ adcl $0,%edx
+ addl %eax,%ebx
+ movl %ebx,(%edi,%ecx,4)
+ incl %ecx
+ movl %edx,%ebx
+ jnz L(oop2)
+
+ adcl $0,%ebx
+
+ movl %ebx,(%edi)
+ addl $4,%edi
+ movl VAR_COUNTER,%eax
+ decl %eax
+ movl %eax,VAR_COUNTER
+ jnz L(outer)
+
+L(skip):
+ popl %ebx
+ popl %edi
+ popl %ebp
+ popl %esi
+ addl $4,%esp
+ ret
+
+L(done):
+ movl %edx,4(%edi) C store to wp[1]
+ popl %edi
+ popl %ebp
+ popl %esi
+ popl %eax C dummy pop for deallocating stack slot
+ ret
+
+EPILOGUE()
+
diff --git a/rts/gmp/mpn/x86/pentium/rshift.asm b/rts/gmp/mpn/x86/pentium/rshift.asm
new file mode 100644
index 0000000000..e8f5ae8ec8
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/rshift.asm
@@ -0,0 +1,236 @@
+dnl Intel Pentium mpn_rshift -- mpn right shift.
+dnl
+dnl cycles/limb
+dnl P5,P54: 6.0
+dnl P55: 5.375
+
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
+C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ebp
+ movl PARAM_SHIFT,%ecx
+
+C We can use faster code for shift-by-1 under certain conditions.
+ cmp $1,%ecx
+ jne L(normal)
+ leal 4(%edi),%eax
+ cmpl %esi,%eax
+ jnc L(special) C jump if res_ptr + 1 >= s_ptr
+ leal (%edi,%ebp,4),%eax
+ cmpl %eax,%esi
+ jnc L(special) C jump if s_ptr >= res_ptr + size
+
+L(normal):
+ movl (%esi),%edx
+ addl $4,%esi
+ xorl %eax,%eax
+ shrdl( %cl, %edx, %eax) C compute carry limb
+ pushl %eax C push carry limb onto stack
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz L(end)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(oop): movl 28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ shrdl( %cl, %eax, %ebx)
+ shrdl( %cl, %edx, %eax)
+ movl %ebx,(%edi)
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ shrdl( %cl, %ebx, %edx)
+ shrdl( %cl, %eax, %ebx)
+ movl %edx,8(%edi)
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ shrdl( %cl, %edx, %eax)
+ shrdl( %cl, %ebx, %edx)
+ movl %eax,16(%edi)
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ shrdl( %cl, %eax, %ebx)
+ shrdl( %cl, %edx, %eax)
+ movl %ebx,24(%edi)
+ movl %eax,28(%edi)
+
+ addl $32,%esi
+ addl $32,%edi
+ decl %ebp
+ jnz L(oop)
+
+L(end): popl %ebp
+ andl $7,%ebp
+ jz L(end2)
+L(oop2):
+ movl (%esi),%eax
+ shrdl( %cl,%eax,%edx) C compute result limb
+ movl %edx,(%edi)
+ movl %eax,%edx
+ addl $4,%esi
+ addl $4,%edi
+ decl %ebp
+ jnz L(oop2)
+
+L(end2):
+ shrl %cl,%edx C compute most significant limb
+ movl %edx,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(special):
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ shrl %edx
+ incl %ebp
+ decl %ebp
+ jz L(Lend)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(Loop):
+ movl -28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ rcrl %eax
+ movl %ebx,(%edi)
+ rcrl %edx
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ rcrl %ebx
+ movl %edx,-8(%edi)
+ rcrl %eax
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ rcrl %edx
+ movl %eax,-16(%edi)
+ rcrl %ebx
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ rcrl %eax
+ movl %ebx,-24(%edi)
+ rcrl %edx
+ movl %eax,-28(%edi)
+
+ leal -32(%esi),%esi C use leal not to clobber carry
+ leal -32(%edi),%edi
+ decl %ebp
+ jnz L(Loop)
+
+L(Lend):
+ popl %ebp
+ sbbl %eax,%eax C save carry in %eax
+ andl $7,%ebp
+ jz L(Lend2)
+ addl %eax,%eax C restore carry from eax
+L(Loop2):
+ movl %edx,%ebx
+ movl (%esi),%edx
+ rcrl %edx
+ movl %ebx,(%edi)
+
+ leal -4(%esi),%esi C use leal not to clobber carry
+ leal -4(%edi),%edi
+ decl %ebp
+ jnz L(Loop2)
+
+ jmp L(L1)
+L(Lend2):
+ addl %eax,%eax C restore carry from eax
+L(L1): movl %edx,(%edi) C store last limb
+
+ movl $0,%eax
+ rcrl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/sqr_basecase.asm b/rts/gmp/mpn/x86/pentium/sqr_basecase.asm
new file mode 100644
index 0000000000..c8584df13c
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/sqr_basecase.asm
@@ -0,0 +1,520 @@
+dnl Intel P5 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular
+dnl product at around 20x20 limbs.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Calculate src,size squared, storing the result in dst,2*size.
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the size is
+C small.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %edx
+ movl PARAM_SRC, %eax
+
+ cmpl $2, %edx
+ movl PARAM_DST, %ecx
+
+ je L(two_limbs)
+
+ movl (%eax), %eax
+ ja L(three_or_more)
+
+C -----------------------------------------------------------------------------
+C one limb only
+ C eax src
+ C ebx
+ C ecx dst
+ C edx
+
+ mull %eax
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ ret
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx dst
+ C edx size
+
+ pushl %ebp
+ pushl %edi
+
+ pushl %esi
+ pushl %ebx
+
+ movl %eax, %ebx
+ movl (%eax), %eax
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx) C dst[0]
+ movl %edx, %esi C dst[1]
+
+ movl 4(%ebx), %eax
+
+ mull %eax C src[1]^2
+
+ movl %eax, %edi C dst[2]
+ movl %edx, %ebp C dst[3]
+
+ movl (%ebx), %eax
+
+ mull 4(%ebx) C src[0]*src[1]
+
+ addl %eax, %esi
+ popl %ebx
+
+ adcl %edx, %edi
+
+ adcl $0, %ebp
+ addl %esi, %eax
+
+ adcl %edi, %edx
+ movl %eax, 4(%ecx)
+
+ adcl $0, %ebp
+ popl %esi
+
+ movl %edx, 8(%ecx)
+ movl %ebp, 12(%ecx)
+
+ popl %edi
+ popl %ebp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(three_or_more):
+ C eax src low limb
+ C ebx
+ C ecx dst
+ C edx size
+
+ cmpl $4, %edx
+ pushl %ebx
+deflit(`FRAME',4)
+
+ movl PARAM_SRC, %ebx
+ jae L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+ C eax src low limb
+ C ebx src
+ C ecx dst
+ C edx size
+
+ pushl %ebp
+ pushl %edi
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ movl 4(%ebx), %eax
+ xorl %ebp, %ebp
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl %edx, 12(%ecx)
+
+ movl 8(%ebx), %eax
+ pushl %esi C risk of cache bank clash
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl %edx, 20(%ecx)
+
+ movl (%ebx), %eax
+
+ mull 4(%ebx) C src[0] * src[1]
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ movl (%ebx), %eax
+
+ mull 8(%ebx) C src[0] * src[2]
+
+ addl %eax, %edi
+ movl %edx, %ebp
+
+ adcl $0, %ebp
+ movl 4(%ebx), %eax
+
+ mull 8(%ebx) C src[1] * src[2]
+
+ xorl %ebx, %ebx
+ addl %eax, %ebp
+
+ C eax
+ C ebx zero, will be dst[5]
+ C ecx dst
+ C edx dst[4]
+ C esi dst[1]
+ C edi dst[2]
+ C ebp dst[3]
+
+ adcl $0, %edx
+ addl %esi, %esi
+
+ adcl %edi, %edi
+
+ adcl %ebp, %ebp
+
+ adcl %edx, %edx
+ movl 4(%ecx), %eax
+
+ adcl $0, %ebx
+ addl %esi, %eax
+
+ movl %eax, 4(%ecx)
+ movl 8(%ecx), %eax
+
+ adcl %edi, %eax
+ movl 12(%ecx), %esi
+
+ adcl %ebp, %esi
+ movl 16(%ecx), %edi
+
+ movl %eax, 8(%ecx)
+ movl %esi, 12(%ecx)
+
+ adcl %edx, %edi
+ popl %esi
+
+ movl 20(%ecx), %eax
+ movl %edi, 16(%ecx)
+
+ popl %edi
+ popl %ebp
+
+ adcl %ebx, %eax C no carry out of this
+ popl %ebx
+
+ movl %eax, 20(%ecx)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(four_or_more):
+ C eax src low limb
+ C ebx src
+ C ecx dst
+ C edx size
+ C esi
+ C edi
+ C ebp
+ C
+ C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+deflit(`FRAME',4)
+
+ pushl %edi
+FRAME_pushl()
+ pushl %esi
+FRAME_pushl()
+
+ pushl %ebp
+FRAME_pushl()
+ leal (%ecx,%edx,4), %edi C dst end of this mul1
+
+ leal (%ebx,%edx,4), %esi C src end
+ movl %ebx, %ebp C src
+
+ negl %edx C -size
+ xorl %ebx, %ebx C clear carry limb and carry flag
+
+ leal 1(%edx), %ecx C -(size-1)
+
+L(mul1):
+ C eax scratch
+ C ebx carry
+ C ecx counter, negative
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp src
+
+ adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull (%ebp)
+
+ addl %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(mul1)
+
+
+ C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for
+ C n=1..size-2.
+ C
+ C The last two products, which are the end corner of the product
+ C triangle, are handled separately to save looping overhead. These
+ C are src[size-3]*src[size-2,size-1] and src[size-2]*src[size-1].
+ C If size is 4 then it's only these that need to be done.
+ C
+ C In the outer loop %esi is a constant, and %edi just advances by 1
+ C limb each time. The size of the operation decreases by 1 limb
+ C each time.
+
+ C eax
+ C ebx carry (needing carry flag added)
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp
+
+ adcl $0, %ebx
+ movl PARAM_SIZE, %edx
+
+ movl %ebx, (%edi)
+ subl $4, %edx
+
+ negl %edx
+ jz L(corner)
+
+
+L(outer):
+ C ebx previous carry limb to store
+ C edx outer loop counter (negative)
+ C esi &src[size]
+ C edi dst, pointing at stored carry limb of previous loop
+
+ pushl %edx C new outer loop counter
+ leal -2(%edx), %ecx
+
+ movl %ebx, (%edi)
+ addl $4, %edi
+
+ addl $4, %ebp
+ xorl %ebx, %ebx C initial carry limb, clear carry flag
+
+L(inner):
+ C eax scratch
+ C ebx carry (needing carry flag added)
+ C ecx counter, negative
+ C edx scratch
+ C esi &src[size]
+ C edi dst end of this addmul
+ C ebp &src[j]
+
+ adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull (%ebp)
+
+ addl %ebx, %eax
+ movl (%edi,%ecx,4), %ebx
+
+ adcl $0, %edx
+ addl %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(inner)
+
+
+ adcl $0, %ebx
+ popl %edx C outer loop counter
+
+ incl %edx
+ jnz L(outer)
+
+
+ movl %ebx, (%edi)
+
+L(corner):
+ C esi &src[size]
+ C edi &dst[2*size-4]
+
+ movl -8(%esi), %eax
+ movl -4(%edi), %ebx C risk of data cache bank clash here
+
+ mull -12(%esi) C src[size-2]*src[size-3]
+
+ addl %eax, %ebx
+ movl %edx, %ecx
+
+ adcl $0, %ecx
+ movl -4(%esi), %eax
+
+ mull -12(%esi) C src[size-1]*src[size-3]
+
+ addl %ecx, %eax
+ movl (%edi), %ecx
+
+ adcl $0, %edx
+ movl %ebx, -4(%edi)
+
+ addl %eax, %ecx
+ movl %edx, %ebx
+
+ adcl $0, %ebx
+ movl -4(%esi), %eax
+
+ mull -8(%esi) C src[size-1]*src[size-2]
+
+ movl %ecx, 0(%edi)
+ addl %eax, %ebx
+
+ adcl $0, %edx
+ movl PARAM_SIZE, %eax
+
+ negl %eax
+ movl %ebx, 4(%edi)
+
+ addl $1, %eax C -(size-1) and clear carry
+ movl %edx, 8(%edi)
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+L(lshift):
+ C eax counter, negative
+ C ebx next limb
+ C ecx
+ C edx
+ C esi
+ C edi &dst[2*size-4]
+ C ebp
+
+ movl 12(%edi,%eax,8), %ebx
+
+ rcll %ebx
+ movl 16(%edi,%eax,8), %ecx
+
+ rcll %ecx
+ movl %ebx, 12(%edi,%eax,8)
+
+ movl %ecx, 16(%edi,%eax,8)
+ incl %eax
+
+ jnz L(lshift)
+
+
+ adcl %eax, %eax C high bit out
+ movl PARAM_SRC, %esi
+
+ movl PARAM_SIZE, %ecx C risk of cache bank clash
+ movl %eax, 12(%edi) C dst most significant limb
+
+
+C -----------------------------------------------------------------------------
+C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+ movl (%esi), %eax C src[0]
+ leal (%esi,%ecx,4), %esi C src end
+
+ negl %ecx
+
+ mull %eax
+
+ movl %eax, 16(%edi,%ecx,8) C dst[0]
+ movl %edx, %ebx
+
+ addl $1, %ecx C size-1 and clear carry
+
+L(diag):
+ C eax scratch (low product)
+ C ebx carry limb
+ C ecx counter, negative
+ C edx scratch (high product)
+ C esi &src[size]
+ C edi &dst[2*size-4]
+ C ebp scratch (fetched dst limbs)
+
+ movl (%esi,%ecx,4), %eax
+ adcl $0, %ebx
+
+ mull %eax
+
+ movl 16-4(%edi,%ecx,8), %ebp
+
+ addl %ebp, %ebx
+ movl 16(%edi,%ecx,8), %ebp
+
+ adcl %eax, %ebp
+ movl %ebx, 16-4(%edi,%ecx,8)
+
+ movl %ebp, 16(%edi,%ecx,8)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(diag)
+
+
+ adcl $0, %edx
+ movl 16-4(%edi), %eax C dst most significant limb
+
+ addl %eax, %edx
+ popl %ebp
+
+ movl %edx, 16-4(%edi)
+ popl %esi C risk of cache bank clash
+
+ popl %edi
+ popl %ebx
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/rshift.asm b/rts/gmp/mpn/x86/rshift.asm
new file mode 100644
index 0000000000..c9881fd966
--- /dev/null
+++ b/rts/gmp/mpn/x86/rshift.asm
@@ -0,0 +1,92 @@
+dnl x86 mpn_rshift -- mpn right shift.
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+deflit(`FRAME',12)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%edx
+ movl PARAM_SHIFT,%ecx
+
+ leal -4(%edi,%edx,4),%edi
+ leal (%esi,%edx,4),%esi
+ negl %edx
+
+ movl (%esi,%edx,4),%ebx C read least significant limb
+ xorl %eax,%eax
+ shrdl( %cl, %ebx, %eax) C compute carry limb
+ incl %edx
+ jz L(end)
+ pushl %eax C push carry limb onto stack
+ testb $1,%dl
+ jnz L(1) C enter loop in the middle
+ movl %ebx,%eax
+
+ ALIGN(8)
+L(oop): movl (%esi,%edx,4),%ebx C load next higher limb
+ shrdl( %cl, %ebx, %eax) C compute result limb
+ movl %eax,(%edi,%edx,4) C store it
+ incl %edx
+L(1): movl (%esi,%edx,4),%eax
+ shrdl( %cl, %eax, %ebx)
+ movl %ebx,(%edi,%edx,4)
+ incl %edx
+ jnz L(oop)
+
+ shrl %cl,%eax C compute most significant limb
+ movl %eax,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+L(end): shrl %cl,%ebx C compute most significant limb
+ movl %ebx,(%edi) C store it
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/udiv.asm b/rts/gmp/mpn/x86/udiv.asm
new file mode 100644
index 0000000000..9fe022b107
--- /dev/null
+++ b/rts/gmp/mpn/x86/udiv.asm
@@ -0,0 +1,44 @@
+dnl x86 mpn_udiv_qrnnd -- 2 by 1 limb division
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *remptr, mp_limb_t high, mp_limb_t low,
+C mp_limb_t divisor);
+
+defframe(PARAM_DIVISOR, 16)
+defframe(PARAM_LOW, 12)
+defframe(PARAM_HIGH, 8)
+defframe(PARAM_REMPTR, 4)
+
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_udiv_qrnnd)
+deflit(`FRAME',0)
+ movl PARAM_LOW, %eax
+ movl PARAM_HIGH, %edx
+ divl PARAM_DIVISOR
+ movl PARAM_REMPTR, %ecx
+ movl %edx, (%ecx)
+ ret
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/umul.asm b/rts/gmp/mpn/x86/umul.asm
new file mode 100644
index 0000000000..3d289d1784
--- /dev/null
+++ b/rts/gmp/mpn/x86/umul.asm
@@ -0,0 +1,43 @@
+dnl mpn_umul_ppmm -- 1x1->2 limb multiplication
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+defframe(PARAM_M2, 12)
+defframe(PARAM_M1, 8)
+defframe(PARAM_LOWPTR, 4)
+
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_umul_ppmm)
+deflit(`FRAME',0)
+ movl PARAM_LOWPTR, %ecx
+ movl PARAM_M1, %eax
+ mull PARAM_M2
+ movl %eax, (%ecx)
+ movl %edx, %eax
+ ret
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/x86-defs.m4 b/rts/gmp/mpn/x86/x86-defs.m4
new file mode 100644
index 0000000000..2dad698002
--- /dev/null
+++ b/rts/gmp/mpn/x86/x86-defs.m4
@@ -0,0 +1,713 @@
+divert(-1)
+
+dnl m4 macros for x86 assembler.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Notes:
+dnl
+dnl m4 isn't perfect for processing BSD style x86 assembler code, the main
+dnl problems are,
+dnl
+dnl 1. Doing define(foo,123) and then using foo in an addressing mode like
+dnl foo(%ebx) expands as a macro rather than a constant. This is worked
+dnl around by using deflit() from asm-defs.m4, instead of define().
+dnl
+dnl 2. Immediates in macro definitions need a space or `' to stop the $
+dnl looking like a macro parameter. For example,
+dnl
+dnl define(foo, `mov $ 123, %eax')
+dnl
+dnl This is only a problem in macro definitions, not in ordinary text,
+dnl nor in macro parameters like text passed to forloop() or ifdef().
+
+
+deflit(BYTES_PER_MP_LIMB, 4)
+
+
+dnl --------------------------------------------------------------------------
+dnl Replacement PROLOGUE/EPILOGUE with more sophisticated error checking.
+dnl Nesting and overlapping not allowed.
+dnl
+
+
+dnl Usage: PROLOGUE(functionname)
+dnl
+dnl Generate a function prologue. functionname gets GSYM_PREFIX added.
+dnl Examples,
+dnl
+dnl PROLOGUE(mpn_add_n)
+dnl PROLOGUE(somefun)
+
+define(`PROLOGUE',
+m4_assert_numargs(1)
+m4_assert_defined(`PROLOGUE_cpu')
+`ifdef(`PROLOGUE_current_function',
+`m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1)
+)')dnl
+m4_file_seen()dnl
+define(`PROLOGUE_current_function',`$1')dnl
+PROLOGUE_cpu(GSYM_PREFIX`'$1)')
+
+
+dnl Usage: EPILOGUE()
+dnl
+dnl Notice the function name is passed to EPILOGUE_cpu(), letting it use $1
+dnl instead of the long PROLOGUE_current_function symbol.
+
+define(`EPILOGUE',
+m4_assert_numargs(0)
+m4_assert_defined(`EPILOGUE_cpu')
+`ifdef(`PROLOGUE_current_function',,
+`m4_error(`EPILOGUE'() with no `PROLOGUE'()
+)')dnl
+EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl
+undefine(`PROLOGUE_current_function')')
+
+m4wrap_prepend(
+`ifdef(`PROLOGUE_current_function',
+`m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen
+')')')
+
+
+dnl Usage: PROLOGUE_assert_inside()
+dnl
+dnl Use this unquoted on a line on its own at the start of a macro
+dnl definition to add some code to check the macro is only used inside a
+dnl PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is
+dnl defined.
+
+define(PROLOGUE_assert_inside,
+m4_assert_numargs(0)
+``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '')
+
+define(PROLOGUE_assert_inside_internal,
+m4_assert_numargs(1)
+`ifdef(`PROLOGUE_current_function',,
+`m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair
+')')')
+
+
+dnl Usage: L(labelname)
+dnl LF(functionname,labelname)
+dnl
+dnl Generate a local label in the current or given function. For LF(),
+dnl functionname gets GSYM_PREFIX added, the same as with PROLOGUE().
+dnl
+dnl For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn),
+dnl
+dnl L(bar) => L__gmpn_add_n__bar
+dnl LF(somefun,bar) => Lsomefun__bar
+dnl
+dnl The funtion name and label name get two underscores between them rather
+dnl than one to guard against clashing with a separate external symbol that
+dnl happened to be called functionname_labelname. (Though this would only
+dnl happen if the local label prefix is is empty.) Underscores are used so
+dnl the whole label will still be a valid C identifier and so can be easily
+dnl used in gdb.
+
+dnl LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the
+dnl L macro and making an infinite recursion.
+define(LF,
+m4_assert_numargs(2)
+m4_assert_defined(`LSYM_PREFIX')
+`defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2')
+
+define(`L',
+m4_assert_numargs(1)
+PROLOGUE_assert_inside()
+`LF(PROLOGUE_current_function,`$1')')
+
+
+dnl Called: PROLOGUE_cpu(gsym)
+dnl EPILOGUE_cpu(gsym)
+
+define(PROLOGUE_cpu,
+m4_assert_numargs(1)
+ `GLOBL $1
+ TYPE($1,`function')
+$1:')
+
+define(EPILOGUE_cpu,
+m4_assert_numargs(1)
+` SIZE($1,.-$1)')
+
+
+
+dnl --------------------------------------------------------------------------
+dnl Various x86 macros.
+dnl
+
+
+dnl Usage: ALIGN_OFFSET(bytes,offset)
+dnl
+dnl Align to `offset' away from a multiple of `bytes'.
+dnl
+dnl This is useful for testing, for example align to something very strict
+dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
+dnl
+dnl Generally you wouldn't execute across the padding, but it's done with
+dnl nop's so it'll work.
+
+define(ALIGN_OFFSET,
+m4_assert_numargs(2)
+`ALIGN($1)
+forloop(`i',1,$2,` nop
+')')
+
+
+dnl Usage: defframe(name,offset)
+dnl
+dnl Make a definition like the following with which to access a parameter
+dnl or variable on the stack.
+dnl
+dnl define(name,`FRAME+offset(%esp)')
+dnl
+dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
+dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
+dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
+dnl zero offset is wanted.
+dnl
+dnl The new macro also gets a check that when it's used FRAME is actually
+dnl defined, and that the final %esp offset isn't negative, which would
+dnl mean an attempt to access something below the current %esp.
+dnl
+dnl deflit() is used rather than a plain define(), so the new macro won't
+dnl delete any following parenthesized expression. name(%edi) will come
+dnl out say as 16(%esp)(%edi). This isn't valid assembler and should
+dnl provoke an error, which is better than silently giving just 16(%esp).
+dnl
+dnl See README.family for more on the suggested way to access the stack
+dnl frame.
+
+define(defframe,
+m4_assert_numargs(2)
+`deflit(`$1',
+m4_assert_defined(`FRAME')
+`defframe_check_notbelow(`$1',$2,FRAME)dnl
+defframe_empty_if_zero(FRAME+($2))(%esp)')')
+
+dnl Called: defframe_empty_if_zero(expression)
+define(defframe_empty_if_zero,
+`ifelse(defframe_empty_if_zero_disabled,1,
+`eval($1)',
+`m4_empty_if_zero($1)')')
+
+dnl Called: defframe_check_notbelow(`name',offset,FRAME)
+define(defframe_check_notbelow,
+m4_assert_numargs(3)
+`ifelse(eval(($3)+($2)<0),1,
+`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
+')')')
+
+
+dnl Usage: FRAME_pushl()
+dnl FRAME_popl()
+dnl FRAME_addl_esp(n)
+dnl FRAME_subl_esp(n)
+dnl
+dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
+dnl %esp of n bytes.
+dnl
+dnl Using these macros is completely optional. Sometimes it makes more
+dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
+dnl jumps and different sequences of FRAME values need to be used in
+dnl different places.
+
+define(FRAME_pushl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+4))')
+
+define(FRAME_popl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-4))')
+
+define(FRAME_addl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-($1)))')
+
+define(FRAME_subl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+($1)))')
+
+
+dnl Usage: defframe_pushl(name)
+dnl
+dnl Do a combination of a FRAME_pushl() and a defframe() to name the stack
+dnl location just pushed. This should come after a pushl instruction.
+dnl Putting it on the same line works and avoids lengthening the code. For
+dnl example,
+dnl
+dnl pushl %eax defframe_pushl(VAR_COUNTER)
+dnl
+dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
+dnl current value without tracking future changes.
+
+define(defframe_pushl,
+`FRAME_pushl()defframe(`$1',-FRAME)')
+
+
+dnl --------------------------------------------------------------------------
+dnl Assembler instruction macros.
+dnl
+
+
+dnl Usage: emms_or_femms
+dnl femms_available_p
+dnl
+dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
+dnl femms instruction is available. emms_or_femms expands to femms if
+dnl available, or emms if not.
+dnl
+dnl emms_or_femms is meant for use in the K6 directory where plain K6
+dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
+dnl supported together.
+dnl
+dnl On K7 femms is no longer faster and is just an alias for emms, so plain
+dnl emms may as well be used.
+
+define(femms_available_p,
+m4_assert_numargs(-1)
+`m4_ifdef_anyof_p(
+ `HAVE_TARGET_CPU_k62',
+ `HAVE_TARGET_CPU_k63',
+ `HAVE_TARGET_CPU_athlon')')
+
+define(emms_or_femms,
+m4_assert_numargs(-1)
+`ifelse(femms_available_p,1,`femms',`emms')')
+
+
+dnl Usage: femms
+dnl
+dnl The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the
+dnl following is a replacement using .byte.
+dnl
+dnl If femms isn't available, an emms is generated instead, for convenience
+dnl when testing on a machine without femms.
+
+define(femms,
+m4_assert_numargs(-1)
+`ifelse(femms_available_p,1,
+`.byte 15,14 C AMD 3DNow femms',
+`emms`'dnl
+m4_warning(`warning, using emms in place of femms, use for testing only
+')')')
+
+
+dnl Usage: jadcl0(op)
+dnl
+dnl Issue a jnc/incl as a substitute for adcl $0,op. This isn't an exact
+dnl replacement, since it doesn't set the flags like adcl does.
+dnl
+dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
+dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch
+dnl misprediction penalty is small, and the multiply algorithm used leads
+dnl to a carry bit on average only 1/4 of the time.
+dnl
+dnl jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for
+dnl comparison. For example,
+dnl
+dnl define(`jadcl0_disabled',1)
+dnl
+dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
+dnl the same size as an adcl. This makes it possible to use the exact same
+dnl computed jump code when testing the relative speed of jnc/incl and adcl
+dnl with jadcl0_disabled.
+
+define(jadcl0,
+m4_assert_numargs(1)
+`ifelse(jadcl0_disabled,1,
+ `adcl $`'0, $1',
+ `jnc 1f
+ incl $1
+1:dnl')')
+
+
+dnl Usage: cmov_available_p
+dnl
+dnl Expand to 1 if cmov is available, 0 if not.
+
+define(cmov_available_p,
+`m4_ifdef_anyof_p(
+ `HAVE_TARGET_CPU_pentiumpro',
+ `HAVE_TARGET_CPU_pentium2',
+ `HAVE_TARGET_CPU_pentium3',
+ `HAVE_TARGET_CPU_athlon')')
+
+
+dnl Usage: x86_lookup(target, key,value, key,value, ...)
+dnl x86_lookup_p(target, key,value, key,value, ...)
+dnl
+dnl Look for `target' among the `key' parameters.
+dnl
+dnl x86_lookup expands to the corresponding `value', or generates an error
+dnl if `target' isn't found.
+dnl
+dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not.
+
+define(x86_lookup,
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+define(x86_lookup_p,
+`ifelse(eval($#<3),1, `0',
+`ifelse(`$1',`$2', `1',
+`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
+
+
+dnl Usage: x86_opcode_reg32(reg)
+dnl x86_opcode_reg32_p(reg)
+dnl
+dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given
+dnl 32-bit register, eg. `%ebp' turns into 5.
+dnl
+dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
+dnl if not.
+
+define(x86_opcode_reg32,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_p,
+m4_assert_onearg()
+`x86_lookup_p(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_list,
+``%eax',0,
+`%ecx',1,
+`%edx',2,
+`%ebx',3,
+`%esp',4,
+`%ebp',5,
+`%esi',6,
+`%edi',7')
+
+
+dnl Usage: x86_opcode_tttn(cond)
+dnl
+dnl Expand to the 4-bit "tttn" field value for the given x86 branch
+dnl condition (like `c', `ae', etc).
+
+define(x86_opcode_tttn,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_ttn_list)')
+
+define(x86_opcode_tttn_list,
+``o', 0,
+`no', 1,
+`b', 2, `c', 2, `nae',2,
+`nb', 3, `nc', 3, `ae', 3,
+`e', 4, `z', 4,
+`ne', 5, `nz', 5,
+`be', 6, `na', 6,
+`nbe', 7, `a', 7,
+`s', 8,
+`ns', 9,
+`p', 10, `pe', 10, `npo',10,
+`np', 11, `npe',11, `po', 11,
+`l', 12, `nge',12,
+`nl', 13, `ge', 13,
+`le', 14, `ng', 14,
+`nle',15, `g', 15')
+
+
+dnl Usage: cmovCC(srcreg,dstreg)
+dnl
+dnl Generate a cmov instruction if the target supports cmov, or simulate it
+dnl with a conditional jump if not (the latter being meant only for
+dnl testing). For example,
+dnl
+dnl cmovz( %eax, %ebx)
+dnl
+dnl cmov instructions are generated using .byte sequences, since only
+dnl recent versions of gas know cmov.
+dnl
+dnl The source operand can only be a plain register. (m4 code implementing
+dnl full memory addressing modes exists, believe it or not, but isn't
+dnl currently needed and isn't included.)
+dnl
+dnl All the standard conditions are defined. Attempting to use one without
+dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
+dnl an error. This ensures the necessary .byte sequences aren't
+dnl accidentally missed.
+
+dnl Called: define_cmov_many(cond,tttn,cond,tttn,...)
+define(define_cmov_many,
+`ifelse(m4_length(`$1'),0,,
+`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
+
+dnl Called: define_cmov(cond,tttn)
+define(define_cmov,
+m4_assert_numargs(2)
+`define(`cmov$1',
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`cmov_internal'(m4_doublequote($`'0),``$1',`$2'',dnl
+m4_doublequote($`'1),m4_doublequote($`'2)))')
+
+define_cmov_many(x86_opcode_tttn_list)
+
+
+dnl Called: cmov_internal(name,cond,tttn,src,dst)
+define(cmov_internal,
+m4_assert_numargs(5)
+`ifelse(cmov_available_p,1,
+`cmov_bytes_tttn(`$1',`$3',`$4',`$5')',
+`m4_warning(`warning, simulating cmov with jump, use for testing only
+')cmov_simulate(`$2',`$4',`$5')')')
+
+dnl Called: cmov_simulate(cond,src,dst)
+dnl If this is going to be used with memory operands for the source it will
+dnl need to be changed to do a fetch even if the condition is false, so as
+dnl to trigger exceptions the same way a real cmov does.
+define(cmov_simulate,
+m4_assert_numargs(3)
+ `j$1 1f C cmov$1 $2, $3
+ jmp 2f
+1: movl $2, $3
+2:')
+
+dnl Called: cmov_bytes_tttn(name,tttn,src,dst)
+define(cmov_bytes_tttn,
+m4_assert_numargs(4)
+`.byte dnl
+15, dnl
+eval(64+$2), dnl
+eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
+ C `$1 $3, $4'')
+
+
+dnl Usage: loop_or_decljnz label
+dnl
+dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever
+dnl is better. "loop" is better on K6 and probably on 386, on other chips
+dnl separate decl/jnz is better.
+dnl
+dnl This macro is just for mpn/x86/divrem_1.asm and mpn/x86/mod_1.asm where
+dnl this loop_or_decljnz variation is enough to let the code be shared by
+dnl all chips.
+
+define(loop_or_decljnz,
+`ifelse(loop_is_better_p,1,
+ `loop',
+ `decl %ecx
+ jnz')')
+
+define(loop_is_better_p,
+`m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6',
+ `HAVE_TARGET_CPU_k62',
+ `HAVE_TARGET_CPU_k63',
+ `HAVE_TARGET_CPU_i386')')
+
+
+dnl Usage: Zdisp(inst,op,op,op)
+dnl
+dnl Generate explicit .byte sequences if necessary to force a byte-sized
+dnl zero displacement on an instruction. For example,
+dnl
+dnl Zdisp( movl, 0,(%esi), %eax)
+dnl
+dnl expands to
+dnl
+dnl .byte 139,70,0 C movl 0(%esi), %eax
+dnl
+dnl If the displacement given isn't 0, then normal assembler code is
+dnl generated. For example,
+dnl
+dnl Zdisp( movl, 4,(%esi), %eax)
+dnl
+dnl expands to
+dnl
+dnl movl 4(%esi), %eax
+dnl
+dnl This means a single Zdisp() form can be used with an expression for the
+dnl displacement, and .byte will be used only if necessary. The
+dnl displacement argument is eval()ed.
+dnl
+dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is
+dnl implemented with a table of instructions and encodings. A new entry is
+dnl needed for any different operation or registers.
+
+define(Zdisp,
+`define(`Zdisp_found',0)dnl
+Zdisp_match( movl, %eax, 0,(%edi), `137,71,0', $@)`'dnl
+Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0', $@)`'dnl
+Zdisp_match( movl, %esi, 0,(%edi), `137,119,0', $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0', $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0', $@)`'dnl
+Zdisp_match( movl, 0,(%esi), %eax, `139,70,0', $@)`'dnl
+Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
+Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0', $@)`'dnl
+Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0', $@)`'dnl
+Zdisp_match( addl, %esi, 0,(%edi), `1,119,0', $@)`'dnl
+Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0', $@)`'dnl
+Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0', $@)`'dnl
+Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0', $@)`'dnl
+Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%esi), %mm0, `15,111,70,0', $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edi), `15,127,71,0', $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
+Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
+ifelse(Zdisp_found,0,
+`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
+')')')
+
+define(Zdisp_match,
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+ && m4_stringequal_p(`$2',0)
+ && m4_stringequal_p(`$3',`$8')
+ && m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$7'),0,
+` .byte $5 C `$1 0$3, $4'',
+` $6 $7$8, $9')',
+
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+ && m4_stringequal_p(`$2',`$7')
+ && m4_stringequal_p(`$3',0)
+ && m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$8'),0,
+` .byte $5 C `$1 $2, 0$4'',
+` $6 $7, $8$9')')')')
+
+
+dnl Usage: shldl(count,src,dst)
+dnl shrdl(count,src,dst)
+dnl shldw(count,src,dst)
+dnl shrdw(count,src,dst)
+dnl
+dnl Generate a double-shift instruction, possibly omitting a %cl count
+dnl parameter if that's what the assembler requires, as indicated by
+dnl WANT_SHLDL_CL in config.m4. For example,
+dnl
+dnl shldl( %cl, %eax, %ebx)
+dnl
+dnl turns into either
+dnl
+dnl shldl %cl, %eax, %ebx
+dnl or
+dnl shldl %eax, %ebx
+dnl
+dnl Immediate counts are always passed through unchanged. For example,
+dnl
+dnl shrdl( $2, %esi, %edi)
+dnl becomes
+dnl shrdl $2, %esi, %edi
+dnl
+dnl
+dnl If you forget to use the macro form "shldl( ...)" and instead write
+dnl just a plain "shldl ...", an error results. This ensures the necessary
+dnl variant treatment of %cl isn't accidentally bypassed.
+
+define(define_shd_instruction,
+`define($1,
+m4_instruction_wrapper()
+m4_assert_numargs(3)
+`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
+m4_doublequote($`'2),m4_doublequote($`'3)))')
+
+dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
+define_shd_instruction(shldl)
+define_shd_instruction(shrdl)
+define_shd_instruction(shldw)
+define_shd_instruction(shrdw)
+
+dnl Called: shd_instruction(op,count,src,dst)
+define(shd_instruction,
+m4_assert_numargs(4)
+m4_assert_defined(`WANT_SHLDL_CL')
+`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
+``$1' `$3', `$4'',
+``$1' `$2', `$3', `$4'')')
+
+
+dnl Usage: ASSERT(cond, instructions)
+dnl
+dnl If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl flags condition to then be satisfied. For example,
+dnl
+dnl ASSERT(ne, `cmpl %eax, %ebx')
+dnl
+dnl The instructions can be omitted to just assert a flags condition with
+dnl no extra calculation. For example,
+dnl
+dnl ASSERT(nc)
+dnl
+dnl When `instructions' is not empty, a pushf/popf is added to preserve the
+dnl flags, but the instructions themselves must preserve any registers that
+dnl matter. FRAME is adjusted for the push and pop, so the instructions
+dnl given can use defframe() stack variables.
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+`ifelse(WANT_ASSERT,1,
+ `C ASSERT
+ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
+ $2
+ j`$1' 1f
+ ud2 C assertion failed
+1:
+ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
+')')
+
+
+dnl Usage: movl_text_address(label,register)
+dnl
+dnl Get the address of a text segment label, using either a plain movl or a
+dnl position-independent calculation, as necessary. For example,
+dnl
+dnl movl_code_address(L(foo),%eax)
+dnl
+dnl This macro is only meant for use in ASSERT()s or when testing, since
+dnl the PIC sequence it generates will want to be done with a ret balancing
+dnl the call on CPUs with return address branch predition.
+dnl
+dnl The addl generated here has a backward reference to 1b, and so won't
+dnl suffer from the two forwards references bug in old gas (described in
+dnl mpn/x86/README.family).
+
+define(movl_text_address,
+`ifdef(`PIC',
+ `call 1f
+1: popl $2 C %eip
+ addl `$'$1-1b, $2',
+ `movl `$'$1, $2')')
+
+
+divert`'dnl
diff --git a/rts/gmp/mpn/z8000/add_n.s b/rts/gmp/mpn/z8000/add_n.s
new file mode 100644
index 0000000000..3a136107fe
--- /dev/null
+++ b/rts/gmp/mpn/z8000/add_n.s
@@ -0,0 +1,53 @@
+! Z8000 __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ unseg
+ .text
+ even
+ global ___gmpn_add_n
+___gmpn_add_n:
+ pop r0,@r6
+ pop r1,@r5
+ add r0,r1
+ ld @r7,r0
+ dec r4
+ jr eq,Lend
+Loop: pop r0,@r6
+ pop r1,@r5
+ adc r0,r1
+ inc r7,#2
+ ld @r7,r0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpn/z8000/gmp-mparam.h b/rts/gmp/mpn/z8000/gmp-mparam.h
new file mode 100644
index 0000000000..4216df673c
--- /dev/null
+++ b/rts/gmp/mpn/z8000/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 16
+#define BYTES_PER_MP_LIMB 2
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 16
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/rts/gmp/mpn/z8000/mul_1.s b/rts/gmp/mpn/z8000/mul_1.s
new file mode 100644
index 0000000000..20fadd340a
--- /dev/null
+++ b/rts/gmp/mpn/z8000/mul_1.s
@@ -0,0 +1,68 @@
+! Z8000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! size r5
+! s2_limb r4
+
+ unseg
+ .text
+ even
+ global ___gmpn_mul_1
+___gmpn_mul_1:
+ sub r2,r2 ! zero carry limb
+ and r4,r4
+ jr mi,Lneg
+
+Lpos: pop r1,@r6
+ ld r9,r1
+ mult rr8,r4
+ and r1,r1 ! shift msb of loaded limb into cy
+ jr mi,Lp ! branch if loaded limb's msb is set
+ add r8,r4 ! hi_limb += sign_comp2
+Lp: add r9,r2 ! lo_limb += cy_limb
+ xor r2,r2
+ adc r2,r8
+ ld @r7,r9
+ inc r7,#2
+ dec r5
+ jr ne,Lpos
+ ret t
+
+Lneg: pop r1,@r6
+ ld r9,r1
+ mult rr8,r4
+ add r8,r1 ! hi_limb += sign_comp1
+ and r1,r1
+ jr mi,Ln
+ add r8,r4 ! hi_limb += sign_comp2
+Ln: add r9,r2 ! lo_limb += cy_limb
+ xor r2,r2
+ adc r2,r8
+ ld @r7,r9
+ inc r7,#2
+ dec r5
+ jr ne,Lneg
+ ret t
diff --git a/rts/gmp/mpn/z8000/sub_n.s b/rts/gmp/mpn/z8000/sub_n.s
new file mode 100644
index 0000000000..bd9a7ad409
--- /dev/null
+++ b/rts/gmp/mpn/z8000/sub_n.s
@@ -0,0 +1,54 @@
+! Z8000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ unseg
+ .text
+ even
+ global ___gmpn_sub_n
+___gmpn_sub_n:
+ pop r0,@r6
+ pop r1,@r5
+ sub r0,r1
+ ld @r7,r0
+ dec r4
+ jr eq,Lend
+Loop: pop r0,@r6
+ pop r1,@r5
+ sbc r0,r1
+ inc r7,#2
+ ld @r7,r0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpn/z8000x/add_n.s b/rts/gmp/mpn/z8000x/add_n.s
new file mode 100644
index 0000000000..7f130785c5
--- /dev/null
+++ b/rts/gmp/mpn/z8000x/add_n.s
@@ -0,0 +1,56 @@
+! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal,
+! non-zero length.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ segm
+ .text
+ even
+ global ___gmpn_add_n
+___gmpn_add_n:
+ popl rr0,@r6
+ popl rr8,@r5
+ addl rr0,rr8
+ ldl @r7,rr0
+ dec r4
+ jr eq,Lend
+Loop: popl rr0,@r6
+ popl rr8,@r5
+ adc r1,r9
+ adc r0,r8
+ inc r7,#4
+ ldl @r7,rr0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ ld r3,r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpn/z8000x/sub_n.s b/rts/gmp/mpn/z8000x/sub_n.s
new file mode 100644
index 0000000000..f416d1d6eb
--- /dev/null
+++ b/rts/gmp/mpn/z8000x/sub_n.s
@@ -0,0 +1,56 @@
+! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the
+! same length > 0 and store difference in a third limb vector.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ segm
+ .text
+ even
+ global ___gmpn_sub_n
+___gmpn_sub_n:
+ popl rr0,@r6
+ popl rr8,@r5
+ subl rr0,rr8
+ ldl @r7,rr0
+ dec r4
+ jr eq,Lend
+Loop: popl rr0,@r6
+ popl rr8,@r5
+ sbc r1,r9
+ sbc r0,r8
+ inc r7,#4
+ ldl @r7,rr0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ ld r3,r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpz/Makefile.am b/rts/gmp/mpz/Makefile.am
new file mode 100644
index 0000000000..cd6fec4e21
--- /dev/null
+++ b/rts/gmp/mpz/Makefile.am
@@ -0,0 +1,58 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+
+SUBDIRS = tests
+
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+noinst_LTLIBRARIES = libmpz.la
+libmpz_la_SOURCES = \
+ abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \
+ bin_ui.c bin_uiui.c cdiv_q.c \
+ cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
+ clear.c clrbit.c cmp.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_ui.c com.c \
+ divexact.c dump.c fac_ui.c fdiv_q.c fdiv_q_2exp.c fdiv_q_ui.c \
+ fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_2exp.c fdiv_r_ui.c fdiv_ui.c \
+ fib_ui.c fits_sint_p.c fits_slong_p.c fits_sshort_p.c fits_uint_p.c \
+ fits_ulong_p.c fits_ushort_p.c gcd.c gcd_ui.c gcdext.c get_d.c get_si.c \
+ get_str.c get_ui.c getlimbn.c hamdist.c init.c inp_raw.c inp_str.c \
+ invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
+ jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
+ lcm.c legendre.c mod.c mul.c mul_2exp.c neg.c nextprime.c \
+ out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
+ powm_ui.c pprime_p.c random.c random2.c realloc.c remove.c root.c rrandomb.c \
+ scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
+ set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \
+ sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
+ tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
+ urandomb.c urandomm.c xor.c
+
+EXTRA_DIST = mul_siui.c
+nodist_libmpz_la_SOURCES = mul_si.c mul_ui.c
+CLEANFILES = $(nodist_libmpz_la_SOURCES)
+
+mul_si.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_si.c
+mul_ui.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_ui.c
diff --git a/rts/gmp/mpz/Makefile.in b/rts/gmp/mpz/Makefile.in
new file mode 100644
index 0000000000..e0f2cdc133
--- /dev/null
+++ b/rts/gmp/mpz/Makefile.in
@@ -0,0 +1,457 @@
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+
+@SET_MAKE@
+build_alias = @build_alias@
+build_triplet = @build@
+host_alias = @host_alias@
+host_triplet = @host@
+target_alias = @target_alias@
+target_triplet = @target@
+AMDEP = @AMDEP@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+EXEEXT = @EXEEXT@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+gmp_srclinks = @gmp_srclinks@
+install_sh = @install_sh@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+
+SUBDIRS =
+
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+noinst_LTLIBRARIES = libmpz.la
+libmpz_la_SOURCES = \
+ abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \
+ bin_ui.c bin_uiui.c cdiv_q.c \
+ cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
+ clear.c clrbit.c cmp.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_ui.c com.c \
+ divexact.c dump.c fac_ui.c fdiv_q.c fdiv_q_2exp.c fdiv_q_ui.c \
+ fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_2exp.c fdiv_r_ui.c fdiv_ui.c \
+ fib_ui.c fits_sint_p.c fits_slong_p.c fits_sshort_p.c fits_uint_p.c \
+ fits_ulong_p.c fits_ushort_p.c gcd.c gcd_ui.c gcdext.c get_d.c get_si.c \
+ get_str.c get_ui.c getlimbn.c hamdist.c init.c inp_raw.c inp_str.c \
+ invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
+ jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
+ lcm.c legendre.c mod.c mul.c mul_2exp.c neg.c nextprime.c \
+ out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
+ powm_ui.c pprime_p.c random.c random2.c realloc.c remove.c root.c rrandomb.c \
+ scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
+ set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \
+ sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
+ tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
+ urandomb.c urandomm.c xor.c
+
+
+EXTRA_DIST = mul_siui.c
+nodist_libmpz_la_SOURCES = mul_si.c mul_ui.c
+CLEANFILES = $(nodist_libmpz_la_SOURCES)
+subdir = mpz
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I..
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+libmpz_la_LDFLAGS =
+libmpz_la_LIBADD =
+am_libmpz_la_OBJECTS = abs.lo add.lo add_ui.lo addmul_ui.lo and.lo \
+array_init.lo bin_ui.lo bin_uiui.lo cdiv_q.lo cdiv_q_ui.lo cdiv_qr.lo \
+cdiv_qr_ui.lo cdiv_r.lo cdiv_r_ui.lo cdiv_ui.lo clear.lo clrbit.lo \
+cmp.lo cmp_si.lo cmp_ui.lo cmpabs.lo cmpabs_ui.lo com.lo divexact.lo \
+dump.lo fac_ui.lo fdiv_q.lo fdiv_q_2exp.lo fdiv_q_ui.lo fdiv_qr.lo \
+fdiv_qr_ui.lo fdiv_r.lo fdiv_r_2exp.lo fdiv_r_ui.lo fdiv_ui.lo \
+fib_ui.lo fits_sint_p.lo fits_slong_p.lo fits_sshort_p.lo \
+fits_uint_p.lo fits_ulong_p.lo fits_ushort_p.lo gcd.lo gcd_ui.lo \
+gcdext.lo get_d.lo get_si.lo get_str.lo get_ui.lo getlimbn.lo \
+hamdist.lo init.lo inp_raw.lo inp_str.lo invert.lo ior.lo iset.lo \
+iset_d.lo iset_si.lo iset_str.lo iset_ui.lo jacobi.lo kronsz.lo \
+kronuz.lo kronzs.lo kronzu.lo lcm.lo legendre.lo mod.lo mul.lo \
+mul_2exp.lo neg.lo nextprime.lo out_raw.lo out_str.lo perfpow.lo \
+perfsqr.lo popcount.lo pow_ui.lo powm.lo powm_ui.lo pprime_p.lo \
+random.lo random2.lo realloc.lo remove.lo root.lo rrandomb.lo scan0.lo \
+scan1.lo set.lo set_d.lo set_f.lo set_q.lo set_si.lo set_str.lo \
+set_ui.lo setbit.lo size.lo sizeinbase.lo sqrt.lo sqrtrem.lo sub.lo \
+sub_ui.lo swap.lo tdiv_ui.lo tdiv_q.lo tdiv_q_2exp.lo tdiv_q_ui.lo \
+tdiv_qr.lo tdiv_qr_ui.lo tdiv_r.lo tdiv_r_2exp.lo tdiv_r_ui.lo \
+tstbit.lo ui_pow_ui.lo urandomb.lo urandomm.lo xor.lo
+nodist_libmpz_la_OBJECTS = mul_si.lo mul_ui.lo
+libmpz_la_OBJECTS = $(am_libmpz_la_OBJECTS) $(nodist_libmpz_la_OBJECTS)
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CFLAGS = @CFLAGS@
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libmpz_la_SOURCES)
+DIST_COMMON = README Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+GZIP_ENV = --best
+depcomp =
+SOURCES = $(libmpz_la_SOURCES) $(nodist_libmpz_la_SOURCES)
+OBJECTS = $(am_libmpz_la_OBJECTS) $(nodist_libmpz_la_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu mpz/Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+mostlyclean-noinstLTLIBRARIES:
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+
+distclean-noinstLTLIBRARIES:
+
+maintainer-clean-noinstLTLIBRARIES:
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+ -rm -f *.$(OBJEXT)
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+
+maintainer-clean-libtool:
+
+libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES)
+ $(LINK) $(libmpz_la_LDFLAGS) $(libmpz_la_OBJECTS) $(libmpz_la_LIBADD) $(LIBS)
+.c.o:
+ $(COMPILE) -c $<
+.c.obj:
+ $(COMPILE) -c `cygpath -w $<`
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+ rev="$$subdir $$rev"; \
+ if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \
+ done; \
+ test "$$dot_seen" = "no" && rev=". $$rev"; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \
+ || etags $(ETAGS_ARGS) $$tags $$unique $(LISP)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+distdir: $(DISTFILES)
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pR $$d/$$file $(distdir); \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+ for subdir in $(SUBDIRS); do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(top_distdir) distdir=../$(distdir)/$$subdir distdir) \
+ || exit 1; \
+ fi; \
+ done
+info-am:
+info: info-recursive
+dvi-am:
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+install-exec-am:
+install-exec: install-exec-recursive
+
+install-data-am:
+install-data: install-data-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am:
+uninstall: uninstall-recursive
+all-am: Makefile $(LTLIBRARIES)
+all-redirect: all-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+
+
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+ -rm -f Makefile.in
+mostlyclean-am: mostlyclean-noinstLTLIBRARIES mostlyclean-compile \
+ mostlyclean-libtool mostlyclean-tags \
+ mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am: clean-noinstLTLIBRARIES clean-compile clean-libtool \
+ clean-tags clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am: distclean-noinstLTLIBRARIES distclean-compile \
+ distclean-libtool distclean-tags distclean-generic \
+ clean-am
+ -rm -f libtool
+
+distclean: distclean-recursive
+
+maintainer-clean-am: maintainer-clean-noinstLTLIBRARIES \
+ maintainer-clean-compile maintainer-clean-libtool \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+
+.PHONY: mostlyclean-noinstLTLIBRARIES distclean-noinstLTLIBRARIES \
+clean-noinstLTLIBRARIES maintainer-clean-noinstLTLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile mostlyclean-libtool distclean-libtool \
+clean-libtool maintainer-clean-libtool install-recursive \
+uninstall-recursive install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck install-exec-am \
+install-exec install-data-am install-data install-am install \
+uninstall-am uninstall all-redirect all-am all install-strip \
+installdirs-am installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+mul_si.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_si.c
+mul_ui.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_ui.c
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rts/gmp/mpz/README b/rts/gmp/mpz/README
new file mode 100644
index 0000000000..06b481d770
--- /dev/null
+++ b/rts/gmp/mpz/README
@@ -0,0 +1,23 @@
+This directory contains functions for GMP's integer function layer.
+
+In this version of GMP, integers are represented like in the figure below.
+(Please note that the format might change between every version, and that
+depending on the internal format in any way is a bad idea.)
+
+ most least
+significant significant
+ limb limb
+
+ _mp_d
+ /
+ /
+ \/
+ ____ ____ ____ ____ ____
+ |____|____|____|____|____|
+
+ <------- _mp_size ------->
+
+
+The most significant limb will be non-zero. The _mp_size field's sign
+reflects the sign of the number. Its absolute value is the count of limbs
+in the number.
diff --git a/rts/gmp/mpz/abs.c b/rts/gmp/mpz/abs.c
new file mode 100644
index 0000000000..0b5eab1ce6
--- /dev/null
+++ b/rts/gmp/mpz/abs.c
@@ -0,0 +1,51 @@
+/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_abs (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_abs (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t size;
+
+ size = ABS (u->_mp_size);
+
+ if (u != w)
+ {
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ }
+
+ w->_mp_size = size;
+}
diff --git a/rts/gmp/mpz/add.c b/rts/gmp/mpz/add.c
new file mode 100644
index 0000000000..a22c3778fb
--- /dev/null
+++ b/rts/gmp/mpz/add.c
@@ -0,0 +1,123 @@
+/* mpz_add -- Add two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_add (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_add (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+madd (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#else
+madd (u, v, w)
+ mpz_srcptr u;
+ mpz_srcptr v;
+ mpz_ptr w;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_srcptr up, vp;
+ mp_ptr wp;
+ mp_size_t usize, vsize, wsize;
+ mp_size_t abs_usize;
+ mp_size_t abs_vsize;
+
+ usize = u->_mp_size;
+ vsize = v->_mp_size;
+ abs_usize = ABS (usize);
+ abs_vsize = ABS (vsize);
+
+ if (abs_usize < abs_vsize)
+ {
+ /* Swap U and V. */
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (usize, vsize);
+ MP_SIZE_T_SWAP (abs_usize, abs_vsize);
+ }
+
+ /* True: ABS_USIZE >= ABS_VSIZE. */
+
+ /* If not space for w (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (u or v may be the same as w). */
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ wp = w->_mp_d;
+
+ if ((usize ^ vsize) < 0)
+ {
+ /* U and V have different sign. Need to compare them to determine
+ which operand to subtract from which. */
+
+ /* This test is right since ABS_USIZE >= ABS_VSIZE. */
+ if (abs_usize != abs_vsize)
+ {
+ mpn_sub (wp, up, abs_usize, vp, abs_vsize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ else if (mpn_cmp (up, vp, abs_usize) < 0)
+ {
+ mpn_sub_n (wp, vp, up, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize >= 0)
+ wsize = -wsize;
+ }
+ else
+ {
+ mpn_sub_n (wp, up, vp, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ }
+ else
+ {
+ /* U and V have same sign. Add them. */
+ mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);
+ wp[abs_usize] = cy_limb;
+ wsize = abs_usize + cy_limb;
+ if (usize < 0)
+ wsize = -wsize;
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/add_ui.c b/rts/gmp/mpz/add_ui.c
new file mode 100644
index 0000000000..28dbd71f45
--- /dev/null
+++ b/rts/gmp/mpz/add_ui.c
@@ -0,0 +1,84 @@
+/* mpz_add_ui -- Add an mpz_t and an unsigned one-word integer.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_add_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+#else
+mpz_add_ui (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int v;
+#endif
+{
+ mp_srcptr up;
+ mp_ptr wp;
+ mp_size_t usize, wsize;
+ mp_size_t abs_usize;
+
+ usize = u->_mp_size;
+ abs_usize = ABS (usize);
+
+ /* If not space for W (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (U may be the same as W). */
+ up = u->_mp_d;
+ wp = w->_mp_d;
+
+ if (abs_usize == 0)
+ {
+ wp[0] = v;
+ w->_mp_size = v != 0;
+ return;
+ }
+
+ if (usize >= 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) v);
+ wp[abs_usize] = cy;
+ wsize = abs_usize + cy;
+ }
+ else
+ {
+ /* The signs are different. Need exact comparison to determine
+ which operand to subtract from which. */
+ if (abs_usize == 1 && up[0] < v)
+ {
+ wp[0] = v - up[0];
+ wsize = 1;
+ }
+ else
+ {
+ mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) v);
+ /* Size can decrease with at most one limb. */
+ wsize = -(abs_usize - (wp[abs_usize - 1] == 0));
+ }
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/addmul_ui.c b/rts/gmp/mpz/addmul_ui.c
new file mode 100644
index 0000000000..7b38d3624d
--- /dev/null
+++ b/rts/gmp/mpz/addmul_ui.c
@@ -0,0 +1,214 @@
+/* mpz_addmul_ui(prodsum, multiplier, small_multiplicand) --
+ Add MULTIPLICATOR times SMALL_MULTIPLICAND to PRODSUM.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static mp_limb_t mpn_neg1 _PROTO ((mp_ptr, mp_size_t));
+
+#if 0
+#undef MPN_NORMALIZE
+#define MPN_NORMALIZE(DST, NLIMBS) \
+ do { \
+ while (--(NLIMBS) >= 0 && (DST)[NLIMBS] == 0) \
+ ; \
+ (NLIMBS)++; \
+ } while (0)
+#undef MPN_NORMALIZE_NOT_ZERO
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
+ do { \
+ while ((DST)[--(NLIMBS)] == 0) \
+ ; \
+ (NLIMBS)++; \
+ } while (0)
+#endif
+
+void
+#if __STDC__
+mpz_addmul_ui (mpz_ptr rz, mpz_srcptr az, unsigned long int bu)
+#else
+mpz_addmul_ui (rz, az, bu)
+ mpz_ptr rz;
+ mpz_srcptr az;
+ unsigned long int bu;
+#endif
+{
+ mp_size_t rn, an;
+ mp_ptr rp, ap;
+
+ an = SIZ (az);
+
+ /* If either multiplier is zero, result is unaffected. */
+ if (bu == 0 || an == 0)
+ return;
+
+ rn = SIZ (rz);
+
+ if (rn == 0)
+ {
+ mp_limb_t cy;
+
+ an = ABS (an);
+ if (ALLOC (rz) <= an)
+ _mpz_realloc (rz, an + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_mul_1 (rp, ap, an, (mp_limb_t) bu);
+ rp[an] = cy;
+ an += cy != 0;
+ SIZ (rz) = SIZ (az) >= 0 ? an : -an;
+ return;
+ }
+
+ if ((an ^ rn) >= 0)
+ {
+ /* Sign of operands are the same--really add. */
+ an = ABS (an);
+ rn = ABS (rn);
+ if (rn > an)
+ {
+ mp_limb_t cy;
+ if (ALLOC (rz) <= rn)
+ _mpz_realloc (rz, rn + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_addmul_1 (rp, ap, an, (mp_limb_t) bu);
+ cy = mpn_add_1 (rp + an, rp + an, rn - an, cy);
+ rp[rn] = cy;
+ rn += cy != 0;
+ SIZ (rz) = SIZ (rz) >= 0 ? rn : -rn;
+ return;
+ }
+ else
+ {
+ mp_limb_t cy;
+ if (ALLOC (rz) <= an)
+ _mpz_realloc (rz, an + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_addmul_1 (rp, ap, rn, (mp_limb_t) bu);
+ if (an != rn)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_mul_1 (rp + rn, ap + rn, an - rn, (mp_limb_t) bu);
+ cy = cy2 + mpn_add_1 (rp + rn, rp + rn, an - rn, cy);
+ }
+ rn = an;
+ rp[rn] = cy;
+ rn += cy != 0;
+ SIZ (rz) = SIZ (rz) >= 0 ? rn : -rn;
+ return;
+ }
+ }
+ else
+ {
+ /* Sign of operands are different--actually subtract. */
+ an = ABS (an);
+ rn = ABS (rn);
+ if (rn > an)
+ {
+ mp_limb_t cy;
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_submul_1 (rp, ap, an, (mp_limb_t) bu);
+ cy = mpn_sub_1 (rp + an, rp + an, rn - an, cy);
+ if (cy != 0)
+ {
+ mpn_neg1 (rp, rn);
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+ }
+ else
+ {
+ MPN_NORMALIZE (rp, rn);
+ rn = -rn;
+ }
+
+ SIZ (rz) = SIZ (rz) >= 0 ? -rn : rn;
+ return;
+ }
+ else
+ {
+ /* Tricky case. We need to subtract an operand that might be larger
+ than the minuend. To avoid allocating temporary space, we compute
+ a*b-r instead of r-a*b and then negate. */
+ mp_limb_t cy;
+ if (ALLOC (rz) <= an)
+ _mpz_realloc (rz, an + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_submul_1 (rp, ap, rn, (mp_limb_t) bu);
+ if (an != rn)
+ {
+ mp_limb_t cy2;
+ cy -= mpn_neg1 (rp, rn);
+ cy2 = mpn_mul_1 (rp + rn, ap + rn, an - rn, (mp_limb_t) bu);
+ if (cy == ~(mp_limb_t) 0)
+ cy = cy2 - mpn_sub_1 (rp + rn, rp + rn, an - rn, (mp_limb_t) 1);
+ else
+ cy = cy2 + mpn_add_1 (rp + rn, rp + rn, an - rn, cy);
+ rp[an] = cy;
+ rn = an + (cy != 0);
+ rn -= rp[rn - 1] == 0;
+ }
+ else if (cy != 0)
+ {
+ cy -= mpn_neg1 (rp, rn);
+ rp[an] = cy;
+ rn = an + 1;
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+ }
+ else
+ {
+ rn = an;
+ MPN_NORMALIZE (rp, rn);
+ rn = -rn;
+ }
+
+ SIZ (rz) = SIZ (rz) >= 0 ? -rn : rn;
+ return;
+ }
+ }
+}
+
+static mp_limb_t
+#if __STDC__
+mpn_neg1 (mp_ptr rp, mp_size_t rn)
+#else
+mpn_neg1 (rp, rn)
+ mp_ptr rp;
+ mp_size_t rn;
+#endif
+{
+ mp_size_t i;
+
+ while (rn != 0 && rp[0] == 0)
+ rp++, rn--;
+
+ if (rn != 0)
+ {
+ rp[0] = -rp[0];
+ for (i = 1; i < rn; i++)
+ rp[i] = ~rp[i];
+ return 1;
+ }
+ return 0;
+}
diff --git a/rts/gmp/mpz/and.c b/rts/gmp/mpz/and.c
new file mode 100644
index 0000000000..354e9455bf
--- /dev/null
+++ b/rts/gmp/mpz/and.c
@@ -0,0 +1,278 @@
+/* mpz_and -- Logical and.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+#else
+mpz_and (res, op1, op2)
+ mpz_ptr res;
+ mpz_srcptr op1;
+ mpz_srcptr op2;
+#endif
+{
+ mp_srcptr op1_ptr, op2_ptr;
+ mp_size_t op1_size, op2_size;
+ mp_ptr res_ptr;
+ mp_size_t res_size;
+ mp_size_t i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op1_size = op1->_mp_size;
+ op2_size = op2->_mp_size;
+
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+
+ if (op1_size >= 0)
+ {
+ if (op2_size >= 0)
+ {
+ res_size = MIN (op1_size, op2_size);
+ /* First loop finds the size of the result. */
+ for (i = res_size - 1; i >= 0; i--)
+ if ((op1_ptr[i] & op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+
+ /* Handle allocation, now then we know exactly how much space is
+ needed for the result. */
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ /* Second loop computes the real result. */
+ for (i = res_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+
+ res->_mp_size = res_size;
+ return;
+ }
+ else /* op2_size < 0 */
+ {
+ /* Fall through to the code at the end of the function. */
+ }
+ }
+ else
+ {
+ if (op2_size < 0)
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+ mp_size_t res_alloc;
+
+ /* Both operands are negative, so will be the result.
+ -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =
+ = ~(~(OP1 - 1) & ~(OP2 - 1)) + 1 =
+ = ((OP1 - 1) | (OP2 - 1)) + 1 */
+
+ /* It might seem as we could end up with an (invalid) result with
+ a leading zero-limb here when one of the operands is of the
+ type 1,,0,,..,,.0. But some analysis shows that we surely
+ would get carry into the zero-limb in this situation... */
+
+ op1_size = -op1_size;
+ op2_size = -op2_size;
+
+ res_alloc = 1 + MAX (op1_size, op2_size);
+
+ opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+ op1_ptr = opx;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to
+ temporary space--never to the space RES->_mp_d used
+ to point to before reallocation. */
+ }
+
+ if (op1_size >= op2_size)
+ {
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+
+ res->_mp_size = -res_size;
+ TMP_FREE (marker);
+ return;
+ }
+ else
+ {
+ /* We should compute -OP1 & OP2. Swap OP1 and OP2 and fall
+ through to the code that handles OP1 & -OP2. */
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ }
+
+ }
+
+ {
+#if ANDNEW
+ mp_size_t op2_lim;
+ mp_size_t count;
+
+ /* OP2 must be negated as with infinite precision.
+
+ Scan from the low end for a non-zero limb. The first non-zero
+ limb is simply negated (two's complement). Any subsequent
+ limbs are one's complemented. Of course, we don't need to
+ handle more limbs than there are limbs in the other, positive
+ operand as the result for those limbs is going to become zero
+ anyway. */
+
+ /* Scan for the least significant non-zero OP2 limb, and zero the
+ result meanwhile for those limb positions. (We will surely
+ find a non-zero limb, so we can write the loop with one
+ termination condition only.) */
+ for (i = 0; op2_ptr[i] == 0; i++)
+ res_ptr[i] = 0;
+ op2_lim = i;
+
+ op2_size = -op2_size;
+
+ if (op1_size <= op2_size)
+ {
+ /* The ones-extended OP2 is >= than the zero-extended OP1.
+ RES_SIZE <= OP1_SIZE. Find the exact size. */
+ for (i = op1_size - 1; i > op2_lim; i--)
+ if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+ for (i = res_size - 1; i > op2_lim; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+ res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+ /* Yes, this *can* happen! */
+ MPN_NORMALIZE (res_ptr, res_size);
+ }
+ else
+ {
+ /* The ones-extended OP2 is < than the zero-extended OP1.
+ RES_SIZE == OP1_SIZE, since OP1 is normalized. */
+ res_size = op1_size;
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+ for (i = op2_size - 1; i > op2_lim; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+ res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+ }
+
+ res->_mp_size = res_size;
+#else
+
+ /* OP1 is positive and zero-extended,
+ OP2 is negative and ones-extended.
+ The result will be positive.
+ OP1 & -OP2 = OP1 & ~(OP2 - 1). */
+
+ mp_ptr opx;
+
+ op2_size = -op2_size;
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ if (op1_size > op2_size)
+ {
+ /* The result has the same size as OP1, since OP1 is normalized
+ and longer than the ones-extended OP2. */
+ res_size = op1_size;
+
+ /* Handle allocation, now then we know exactly how much space is
+ needed for the result. */
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ res_ptr = res->_mp_d;
+ op1_ptr = op1->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ res_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+
+ res->_mp_size = res_size;
+ }
+ else
+ {
+ /* Find out the exact result size. Ignore the high limbs of OP2,
+ OP1 is zero-extended and would make the result zero. */
+ for (i = op1_size - 1; i >= 0; i--)
+ if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+
+ /* Handle allocation, now then we know exactly how much space is
+ needed for the result. */
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ res_ptr = res->_mp_d;
+ op1_ptr = op1->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ for (i = res_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+
+ res->_mp_size = res_size;
+ }
+#endif
+ }
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/array_init.c b/rts/gmp/mpz/array_init.c
new file mode 100644
index 0000000000..1c22046986
--- /dev/null
+++ b/rts/gmp/mpz/array_init.c
@@ -0,0 +1,48 @@
+/* mpz_array_init (array, array_size, size_per_elem) --
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)
+#else
+mpz_array_init (arr, arr_size, nbits)
+ mpz_ptr arr;
+ mp_size_t arr_size;
+ mp_size_t nbits;
+#endif
+{
+ register mp_ptr p;
+ register size_t i;
+ mp_size_t nlimbs;
+
+ nlimbs = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ p = (mp_ptr) (*_mp_allocate_func) (arr_size * nlimbs * BYTES_PER_MP_LIMB);
+
+ for (i = 0; i < arr_size; i++)
+ {
+ arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */
+ arr[i]._mp_size = 0;
+ arr[i]._mp_d = p + i * nlimbs;
+ }
+}
diff --git a/rts/gmp/mpz/bin_ui.c b/rts/gmp/mpz/bin_ui.c
new file mode 100644
index 0000000000..a7a6c98218
--- /dev/null
+++ b/rts/gmp/mpz/bin_ui.c
@@ -0,0 +1,141 @@
+/* mpz_bin_uiui - compute n over k.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This is a poor implementation. Look at bin_uiui.c for improvement ideas.
+ In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
+ the code here only for big n.
+
+ The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
+ 1 section 1.2.6 part G. */
+
+
+/* Enhancement: use mpn_divexact_1 when it exists */
+#define DIVIDE() \
+ ASSERT (SIZ(r) > 0); \
+ ASSERT_NOCARRY (mpn_divrem_1 (PTR(r), (mp_size_t) 0, \
+ PTR(r), SIZ(r), kacc)); \
+ SIZ(r) -= (PTR(r)[SIZ(r)-1] == 0);
+
+void
+#if __STDC__
+mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
+#else
+mpz_bin_ui (r, n, k)
+ mpz_ptr r;
+ mpz_srcptr n;
+ unsigned long int k;
+#endif
+{
+ mpz_t ni;
+ mp_limb_t i;
+ mpz_t nacc;
+ mp_limb_t kacc;
+ mp_size_t negate;
+
+ if (mpz_sgn (n) < 0)
+ {
+ /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
+ mpz_init (ni);
+ mpz_neg (ni, n);
+ mpz_sub_ui (ni, ni, 1L);
+ negate = (k & 1); /* (-1)^k */
+ }
+ else
+ {
+ /* bin(n,k) == 0 if k>n
+ (no test for this under the n<0 case, since -n+k-1 >= k there) */
+ if (mpz_cmp_ui (n, k) < 0)
+ {
+ mpz_set_ui (r, 0L);
+ return;
+ }
+
+ /* set ni = n-k */
+ mpz_init (ni);
+ mpz_sub_ui (ni, n, k);
+ negate = 0;
+ }
+
+ /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
+ for positive, 1 for negative). */
+ mpz_set_ui (r, 1L);
+
+ /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's
+ whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
+ = ni, and new ni of ni+k-ni = k. */
+ if (mpz_cmp_ui (ni, k) < 0)
+ {
+ unsigned long tmp;
+ tmp = k;
+ k = mpz_get_ui (ni);
+ mpz_set_ui (ni, tmp);
+ }
+
+ kacc = 1;
+ mpz_init_set_ui (nacc, 1);
+
+ for (i = 1; i <= k; i++)
+ {
+ mp_limb_t k1, k0;
+
+#if 0
+ mp_limb_t nacclow;
+ int c;
+
+ nacclow = PTR(nacc)[0];
+ for (c = 0; (((kacc | nacclow) & 1) == 0); c++)
+ {
+ kacc >>= 1;
+ nacclow >>= 1;
+ }
+ mpz_div_2exp (nacc, nacc, c);
+#endif
+
+ mpz_add_ui (ni, ni, 1);
+ mpz_mul (nacc, nacc, ni);
+ umul_ppmm (k1, k0, kacc, i);
+ if (k1 != 0)
+ {
+ /* Accumulator overflow. Perform bignum step. */
+ mpz_mul (r, r, nacc);
+ mpz_set_ui (nacc, 1);
+ DIVIDE ();
+ kacc = i;
+ }
+ else
+ {
+ /* Save new products in accumulators to keep accumulating. */
+ kacc = k0;
+ }
+ }
+
+ mpz_mul (r, r, nacc);
+ DIVIDE ();
+ SIZ(r) = (SIZ(r) ^ -negate) + negate;
+
+ mpz_clear (nacc);
+ mpz_clear (ni);
+}
diff --git a/rts/gmp/mpz/bin_uiui.c b/rts/gmp/mpz/bin_uiui.c
new file mode 100644
index 0000000000..b37541ba54
--- /dev/null
+++ b/rts/gmp/mpz/bin_uiui.c
@@ -0,0 +1,120 @@
+/* mpz_bin_uiui - compute n over k.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Avoid reallocs by rounding up any new size */
+#define ROUNDUP_MASK 15
+
+/* Enhancement: use mpn_divexact_1 when it exists */
+#define MULDIV() \
+ MPZ_REALLOC (r, (SIZ(r)+1)|ROUNDUP_MASK); \
+ PTR(r)[SIZ(r)] = mpn_mul_1 (PTR(r), PTR(r), SIZ(r), nacc); \
+ ASSERT_NOCARRY (mpn_divrem_1 (PTR(r), (mp_size_t) 0, \
+ PTR(r), SIZ(r)+1, kacc)); \
+ SIZ(r) += (PTR(r)[SIZ(r)] != 0);
+
+void
+#if __STDC__
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+#else
+mpz_bin_uiui (r, n, k)
+ mpz_ptr r;
+ unsigned long int n;
+ unsigned long int k;
+#endif
+{
+ unsigned long int i, j;
+ mp_limb_t nacc, kacc;
+ unsigned long int cnt;
+
+ /* bin(n,k) = 0 if k>n. */
+ if (n < k)
+ {
+ mpz_set_ui (r, 0);
+ return;
+ }
+
+ /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+ k = MIN (k, n-k);
+
+ /* bin(n,0) = 1 */
+ if (k == 0)
+ {
+ mpz_set_ui (r, 1);
+ return;
+ }
+
+ j = n - k + 1;
+ mpz_set_ui (r, j);
+
+ /* Initialize accumulators. */
+ nacc = 1;
+ kacc = 1;
+
+ cnt = 0;
+ for (i = 2; i <= k; i++)
+ {
+ mp_limb_t n1, n0, k1, k0;
+
+ j++;
+#if 0
+ /* Remove common multiples of 2. This will allow us to accumulate
+ more in nacc and kacc before we need a bignum step. It would make
+ sense to cancel factors of 3, 5, etc too, but this would be best
+ handled by sieving out factors. Alternatively, we could perform a
+ gcd of the accumulators just as they have overflown, and keep
+ accumulating until the gcd doesn't remove a significant factor. */
+ while (((nacc | kacc) & 1) == 0)
+ {
+ nacc >>= 1;
+ kacc >>= 1;
+ }
+#else
+ cnt = ((nacc | kacc) & 1) ^ 1;
+ nacc >>= cnt;
+ kacc >>= cnt;
+#endif
+ /* Accumulate next multiples. */
+ umul_ppmm (n1, n0, nacc, j);
+ umul_ppmm (k1, k0, kacc, i);
+ if (n1 != 0)
+ {
+ /* Accumulator overflow. Perform bignum step. */
+ MULDIV ();
+ nacc = j;
+ kacc = i;
+ }
+ else
+ {
+ if (k1 != 0) abort ();
+ /* Save new products in accumulators to keep accumulating. */
+ nacc = n0;
+ kacc = k0;
+ }
+ }
+
+ /* Take care of whatever is left in accumulators. */
+ MULDIV ();
+}
diff --git a/rts/gmp/mpz/cdiv_q.c b/rts/gmp/mpz/cdiv_q.c
new file mode 100644
index 0000000000..b15ba8aaa9
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_q.c
@@ -0,0 +1,51 @@
+/* mpz_cdiv_q -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_cdiv_q (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t dividend_size = dividend->_mp_size;
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t rem;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0)
+ mpz_add_ui (quot, quot, 1L);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/cdiv_q_ui.c b/rts/gmp/mpz/cdiv_q_ui.c
new file mode 100644
index 0000000000..74f3a90b83
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_q_ui.c
@@ -0,0 +1,67 @@
+/* mpz_cdiv_q_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_q_ui (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/cdiv_qr.c b/rts/gmp/mpz/cdiv_qr.c
new file mode 100644
index 0000000000..29c7c41a4e
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_qr.c
@@ -0,0 +1,64 @@
+/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_cdiv_qr (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t xsize;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the quotient and
+ remainder have been preliminary calculated. We have to copy it to
+ temporary space if it's the same variable as either QUOT or REM. */
+ if (quot == divisor || rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ xsize = dividend->_mp_size ^ divisor_size;;
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if (xsize >= 0 && rem->_mp_size != 0)
+ {
+ mpz_add_ui (quot, quot, 1L);
+ mpz_sub (rem, rem, divisor);
+ }
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/cdiv_qr_ui.c b/rts/gmp/mpz/cdiv_qr_ui.c
new file mode 100644
index 0000000000..a7873c6e20
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_qr_ui.c
@@ -0,0 +1,71 @@
+/* mpz_cdiv_qr_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_qr_ui (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = -(remainder_limb != 0);
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/cdiv_r.c b/rts/gmp/mpz/cdiv_r.c
new file mode 100644
index 0000000000..e96ce7e677
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_r.c
@@ -0,0 +1,59 @@
+/* mpz_cdiv_r -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_cdiv_r (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the remainder has been
+ preliminary calculated. We have to copy it to temporary space if it's
+ the same variable as REM. */
+ if (rem == divisor)
+ {
+
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ mpz_tdiv_r (rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0)
+ mpz_sub (rem, rem, divisor);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/cdiv_r_ui.c b/rts/gmp/mpz/cdiv_r_ui.c
new file mode 100644
index 0000000000..e17e2381c0
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_r_ui.c
@@ -0,0 +1,57 @@
+/* mpz_cdiv_r_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_r_ui (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ remainder_limb = divisor - remainder_limb;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = -(remainder_limb != 0);
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/cdiv_ui.c b/rts/gmp/mpz/cdiv_ui.c
new file mode 100644
index 0000000000..63547a78c0
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_ui.c
@@ -0,0 +1,50 @@
+/* mpz_cdiv_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_ui (dividend, divisor)
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ remainder_limb = divisor - remainder_limb;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/clear.c b/rts/gmp/mpz/clear.c
new file mode 100644
index 0000000000..5224553f9e
--- /dev/null
+++ b/rts/gmp/mpz/clear.c
@@ -0,0 +1,35 @@
+/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of
+ an integer.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_clear (mpz_ptr m)
+#else
+mpz_clear (m)
+ mpz_ptr m;
+#endif
+{
+ (*_mp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
+}
diff --git a/rts/gmp/mpz/clrbit.c b/rts/gmp/mpz/clrbit.c
new file mode 100644
index 0000000000..865d84902f
--- /dev/null
+++ b/rts/gmp/mpz/clrbit.c
@@ -0,0 +1,114 @@
+/* mpz_clrbit -- clear a specified bit.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_clrbit (mpz_ptr d, unsigned long int bit_index)
+#else
+mpz_clrbit (d, bit_index)
+ mpz_ptr d;
+ unsigned long int bit_index;
+#endif
+{
+ mp_size_t dsize = d->_mp_size;
+ mp_ptr dp = d->_mp_d;
+ mp_size_t limb_index;
+
+ limb_index = bit_index / BITS_PER_MP_LIMB;
+ if (dsize >= 0)
+ {
+ if (limb_index < dsize)
+ {
+ dp[limb_index] &= ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB));
+ MPN_NORMALIZE (dp, dsize);
+ d->_mp_size = dsize;
+ }
+ else
+ ;
+ }
+ else
+ {
+ mp_size_t zero_bound;
+
+ /* Simulate two's complement arithmetic, i.e. simulate
+ 1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+ 2. clear the bit.
+ 3. Set OP = ~OP + 1. */
+
+ dsize = -dsize;
+
+ /* No upper bound on this loop, we're sure there's a non-zero limb
+ sooner ot later. */
+ for (zero_bound = 0; ; zero_bound++)
+ if (dp[zero_bound] != 0)
+ break;
+
+ if (limb_index > zero_bound)
+ {
+ if (limb_index < dsize)
+ dp[limb_index] |= (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ else
+ {
+ /* Ugh. The bit should be cleared outside of the end of the
+ number. We have to increase the size of the number. */
+ if (d->_mp_alloc < limb_index + 1)
+ {
+ _mpz_realloc (d, limb_index + 1);
+ dp = d->_mp_d;
+ }
+ MPN_ZERO (dp + dsize, limb_index - dsize);
+ dp[limb_index] = (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ d->_mp_size = -(limb_index + 1);
+ }
+ }
+ else if (limb_index == zero_bound)
+ {
+ dp[limb_index] = ((dp[limb_index] - 1)
+ | ((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB))) + 1;
+ if (dp[limb_index] == 0)
+ {
+ mp_size_t i;
+ for (i = limb_index + 1; i < dsize; i++)
+ {
+ dp[i] += 1;
+ if (dp[i] != 0)
+ goto fin;
+ }
+ /* We got carry all way out beyond the end of D. Increase
+ its size (and allocation if necessary). */
+ dsize++;
+ if (d->_mp_alloc < dsize)
+ {
+ _mpz_realloc (d, dsize);
+ dp = d->_mp_d;
+ }
+ dp[i] = 1;
+ d->_mp_size = -dsize;
+ fin:;
+ }
+ }
+ else
+ ;
+ }
+}
diff --git a/rts/gmp/mpz/cmp.c b/rts/gmp/mpz/cmp.c
new file mode 100644
index 0000000000..60628348e5
--- /dev/null
+++ b/rts/gmp/mpz/cmp.c
@@ -0,0 +1,75 @@
+/* mpz_cmp(u,v) -- Compare U, V. Return postive, zero, or negative
+ based on if U > V, U == V, or U < V.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef BERKELEY_MP
+int
+#if __STDC__
+mpz_cmp (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_cmp (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+int
+#if __STDC__
+mcmp (mpz_srcptr u, mpz_srcptr v)
+#else
+mcmp (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize = v->_mp_size;
+ mp_size_t size;
+ mp_srcptr up, vp;
+ int cmp;
+
+ if (usize != vsize)
+ return usize - vsize;
+
+ if (usize == 0)
+ return 0;
+
+ size = ABS (usize);
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ cmp = mpn_cmp (up, vp, size);
+
+ if (cmp == 0)
+ return 0;
+
+ if ((cmp < 0) == (usize < 0))
+ return 1;
+ else
+ return -1;
+}
diff --git a/rts/gmp/mpz/cmp_si.c b/rts/gmp/mpz/cmp_si.c
new file mode 100644
index 0000000000..0c2212fbe9
--- /dev/null
+++ b/rts/gmp/mpz/cmp_si.c
@@ -0,0 +1,64 @@
+/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.
+ Return positive, zero, or negative based on if U > V, U == V, or U < V.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+_mpz_cmp_si (mpz_srcptr u, signed long int v_digit)
+#else
+_mpz_cmp_si (u, v_digit)
+ mpz_srcptr u;
+ signed long int v_digit;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize;
+ mp_limb_t u_digit;
+
+ vsize = 0;
+ if (v_digit > 0)
+ vsize = 1;
+ else if (v_digit < 0)
+ {
+ vsize = -1;
+ v_digit = -v_digit;
+ }
+
+ if (usize != vsize)
+ return usize - vsize;
+
+ if (usize == 0)
+ return 0;
+
+ u_digit = u->_mp_d[0];
+
+ if (u_digit == (mp_limb_t) (unsigned long) v_digit)
+ return 0;
+
+ if (u_digit > (mp_limb_t) (unsigned long) v_digit)
+ return usize;
+ else
+ return -usize;
+}
diff --git a/rts/gmp/mpz/cmp_ui.c b/rts/gmp/mpz/cmp_ui.c
new file mode 100644
index 0000000000..fd84f301c1
--- /dev/null
+++ b/rts/gmp/mpz/cmp_ui.c
@@ -0,0 +1,53 @@
+/* mpz_cmp_ui.c -- Compare a mpz_t a with an mp_limb_t b. Return positive,
+ zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+_mpz_cmp_ui (mpz_srcptr u, unsigned long int v_digit)
+#else
+_mpz_cmp_ui (u, v_digit)
+ mpz_srcptr u;
+ unsigned long int v_digit;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize == 0)
+ return -(v_digit != 0);
+
+ if (usize == 1)
+ {
+ mp_limb_t u_digit;
+
+ u_digit = u->_mp_d[0];
+ if (u_digit > v_digit)
+ return 1;
+ if (u_digit < v_digit)
+ return -1;
+ return 0;
+ }
+
+ return (usize > 0) ? 1 : -1;
+}
diff --git a/rts/gmp/mpz/cmpabs.c b/rts/gmp/mpz/cmpabs.c
new file mode 100644
index 0000000000..037d7a9145
--- /dev/null
+++ b/rts/gmp/mpz/cmpabs.c
@@ -0,0 +1,57 @@
+/* mpz_cmpabs(u,v) -- Compare U, V. Return postive, zero, or negative
+ based on if U > V, U == V, or U < V.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_cmpabs (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_cmpabs (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize = v->_mp_size;
+ mp_size_t size;
+ mp_srcptr up, vp;
+ int cmp;
+
+ usize = ABS (usize);
+ vsize = ABS (vsize);
+
+ if (usize != vsize)
+ return usize - vsize;
+
+ if (usize == 0)
+ return 0;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ cmp = mpn_cmp (up, vp, usize);
+
+ return cmp;
+}
diff --git a/rts/gmp/mpz/cmpabs_ui.c b/rts/gmp/mpz/cmpabs_ui.c
new file mode 100644
index 0000000000..db816b5820
--- /dev/null
+++ b/rts/gmp/mpz/cmpabs_ui.c
@@ -0,0 +1,56 @@
+/* mpz_cmpabs_ui.c -- Compare a mpz_t a with an mp_limb_t b. Return positive,
+ zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_cmpabs_ui (mpz_srcptr u, unsigned long int v_digit)
+#else
+mpz_cmpabs_ui (u, v_digit)
+ mpz_srcptr u;
+ unsigned long int v_digit;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize == 0)
+ return -(v_digit != 0);
+
+ usize = ABS (usize);
+
+ if (usize == 1)
+ {
+ mp_limb_t u_digit;
+
+ u_digit = u->_mp_d[0];
+ if (u_digit > v_digit)
+ return 1;
+ if (u_digit < v_digit)
+ return -1;
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/rts/gmp/mpz/com.c b/rts/gmp/mpz/com.c
new file mode 100644
index 0000000000..18d6427779
--- /dev/null
+++ b/rts/gmp/mpz/com.c
@@ -0,0 +1,93 @@
+/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of
+ SRC to DST.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_com (mpz_ptr dst, mpz_srcptr src)
+#else
+mpz_com (dst, src)
+ mpz_ptr dst;
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size = src->_mp_size;
+ mp_srcptr src_ptr;
+ mp_ptr dst_ptr;
+
+ if (size >= 0)
+ {
+ /* As with infinite precision: one's complement, two's complement.
+ But this can be simplified using the identity -x = ~x + 1.
+ So we're going to compute (~~x) + 1 = x + 1! */
+
+ if (dst->_mp_alloc < size + 1)
+ _mpz_realloc (dst, size + 1);
+
+ src_ptr = src->_mp_d;
+ dst_ptr = dst->_mp_d;
+
+ if (size == 0)
+ {
+ /* Special case, as mpn_add wants the first arg's size >= the
+ second arg's size. */
+ dst_ptr[0] = 1;
+ dst->_mp_size = -1;
+ return;
+ }
+
+ {
+ mp_limb_t cy;
+
+ cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+ if (cy)
+ {
+ dst_ptr[size] = cy;
+ size++;
+ }
+ }
+
+ /* Store a negative size, to indicate ones-extension. */
+ dst->_mp_size = -size;
+ }
+ else
+ {
+ /* As with infinite precision: two's complement, then one's complement.
+ But that can be simplified using the identity -x = ~(x - 1).
+ So we're going to compute ~~(x - 1) = x - 1! */
+ size = -size;
+
+ if (dst->_mp_alloc < size)
+ _mpz_realloc (dst, size);
+
+ src_ptr = src->_mp_d;
+ dst_ptr = dst->_mp_d;
+
+ mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+ size -= dst_ptr[size - 1] == 0;
+
+ /* Store a positive size, to indicate zero-extension. */
+ dst->_mp_size = size;
+ }
+}
diff --git a/rts/gmp/mpz/divexact.c b/rts/gmp/mpz/divexact.c
new file mode 100644
index 0000000000..c2970454fd
--- /dev/null
+++ b/rts/gmp/mpz/divexact.c
@@ -0,0 +1,125 @@
+/* mpz_divexact -- finds quotient when known that quot * den == num && den != 0.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+ Funding for this work has been partially provided by Conselho Nacional
+ de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+ 301314194-2, and was done while I was a visiting reseacher in the Instituto
+ de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+ References:
+ T. Jebelean, An algorithm for exact division, Journal of Symbolic
+ Computation, v. 15, 1993, pp. 169-180. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_divexact (quot, num, den)
+ mpz_ptr quot;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+{
+ mp_ptr qp, tp;
+ mp_size_t qsize, tsize;
+ mp_srcptr np, dp;
+ mp_size_t nsize, dsize;
+ TMP_DECL (marker);
+
+ nsize = ABS (num->_mp_size);
+ dsize = ABS (den->_mp_size);
+
+ qsize = nsize - dsize + 1;
+ if (quot->_mp_alloc < qsize)
+ _mpz_realloc (quot, qsize);
+
+ np = num->_mp_d;
+ dp = den->_mp_d;
+ qp = quot->_mp_d;
+
+ if (nsize == 0)
+ {
+ if (dsize == 0)
+ DIVIDE_BY_ZERO;
+ quot->_mp_size = 0;
+ return;
+ }
+
+ if (dsize <= 1)
+ {
+ if (dsize == 1)
+ {
+ mpn_divmod_1 (qp, np, nsize, dp[0]);
+ qsize -= qp[qsize - 1] == 0;
+ quot->_mp_size = (num->_mp_size ^ den->_mp_size) >= 0 ? qsize : -qsize;
+ return;
+ }
+
+ /* Generate divide-by-zero error since dsize == 0. */
+ DIVIDE_BY_ZERO;
+ }
+
+ TMP_MARK (marker);
+
+ /* QUOT <-- NUM/2^r, T <-- DEN/2^r where = r number of twos in DEN. */
+ while (dp[0] == 0)
+ np += 1, nsize -= 1, dp += 1, dsize -= 1;
+ tsize = MIN (qsize, dsize);
+ if ((dp[0] & 1) != 0)
+ {
+ if (quot == den) /* QUOT and DEN overlap. */
+ {
+ tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, tsize);
+ }
+ else
+ tp = (mp_ptr) dp;
+ if (qp != np)
+ MPN_COPY_INCR (qp, np, qsize);
+ }
+ else
+ {
+ unsigned int r;
+ tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+ count_trailing_zeros (r, dp[0]);
+ mpn_rshift (tp, dp, tsize, r);
+ if (dsize > tsize)
+ tp[tsize - 1] |= dp[tsize] << (BITS_PER_MP_LIMB - r);
+ mpn_rshift (qp, np, qsize, r);
+ if (nsize > qsize)
+ qp[qsize - 1] |= np[qsize] << (BITS_PER_MP_LIMB - r);
+ }
+
+ /* Now QUOT <-- QUOT/T. */
+ mpn_bdivmod (qp, qp, qsize, tp, tsize, qsize * BITS_PER_MP_LIMB);
+ MPN_NORMALIZE (qp, qsize);
+
+ quot->_mp_size = (num->_mp_size ^ den->_mp_size) >= 0 ? qsize : -qsize;
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/dump.c b/rts/gmp/mpz/dump.c
new file mode 100644
index 0000000000..dc318ac8cf
--- /dev/null
+++ b/rts/gmp/mpz/dump.c
@@ -0,0 +1,44 @@
+/* mpz_dump - Dump an integer to stdout.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO
+ CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+ FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_dump (mpz_srcptr u)
+#else
+mpz_dump (u)
+ mpz_srcptr u;
+#endif
+{
+ char *str;
+
+ str = mpz_get_str (0, 10, u);
+ printf ("%s\n", str);
+ (*_mp_free_func) (str, 0);/* ??? broken alloc interface, pass what size ??? */
+}
diff --git a/rts/gmp/mpz/fac_ui.c b/rts/gmp/mpz/fac_ui.c
new file mode 100644
index 0000000000..85f40f271c
--- /dev/null
+++ b/rts/gmp/mpz/fac_ui.c
@@ -0,0 +1,157 @@
+/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifdef DBG
+#include <stdio.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_fac_ui (mpz_ptr result, unsigned long int n)
+#else
+mpz_fac_ui (result, n)
+ mpz_ptr result;
+ unsigned long int n;
+#endif
+{
+#if SIMPLE_FAC
+
+ /* Be silly. Just multiply the numbers in ascending order. O(n**2). */
+
+ unsigned long int k;
+
+ mpz_set_ui (result, 1L);
+
+ for (k = 2; k <= n; k++)
+ mpz_mul_ui (result, result, k);
+#else
+
+ /* Be smarter. Multiply groups of numbers in ascending order until the
+ product doesn't fit in a limb. Multiply these partial product in a
+ balanced binary tree fashion, to make the operand have as equal sizes
+ as possible. When the operands have about the same size, mpn_mul
+ becomes faster. */
+
+ unsigned long int p, k;
+ mp_limb_t p1, p0;
+
+ /* Stack of partial products, used to make the computation balanced
+ (i.e. make the sizes of the multiplication operands equal). The
+ topmost position of MP_STACK will contain a one-limb partial product,
+ the second topmost will contain a two-limb partial product, and so
+ on. MP_STACK[0] will contain a partial product with 2**t limbs.
+ To compute n! MP_STACK needs to be less than
+ log(n)**2/log(BITS_PER_MP_LIMB), so 30 is surely enough. */
+#define MP_STACK_SIZE 30
+ mpz_t mp_stack[MP_STACK_SIZE];
+
+ /* TOP is an index into MP_STACK, giving the topmost element.
+ TOP_LIMIT_SO_FAR is the largets value it has taken so far. */
+ int top, top_limit_so_far;
+
+ /* Count of the total number of limbs put on MP_STACK so far. This
+ variable plays an essential role in making the compututation balanced.
+ See below. */
+ unsigned int tree_cnt;
+
+ top = top_limit_so_far = -1;
+ tree_cnt = 0;
+ p = 1;
+ for (k = 2; k <= n; k++)
+ {
+ /* Multiply the partial product in P with K. */
+ umul_ppmm (p1, p0, (mp_limb_t) p, (mp_limb_t) k);
+
+ /* Did we get overflow into the high limb, i.e. is the partial
+ product now more than one limb? */
+ if (p1 != 0)
+ {
+ tree_cnt++;
+
+ if (tree_cnt % 2 == 0)
+ {
+ mp_size_t i;
+
+ /* TREE_CNT is even (i.e. we have generated an even number of
+ one-limb partial products), which means that we have a
+ single-limb product on the top of MP_STACK. */
+
+ mpz_mul_ui (mp_stack[top], mp_stack[top], p);
+
+ /* If TREE_CNT is divisable by 4, 8,..., we have two
+ similar-sized partial products with 2, 4,... limbs at
+ the topmost two positions of MP_STACK. Multiply them
+ to form a new partial product with 4, 8,... limbs. */
+ for (i = 4; (tree_cnt & (i - 1)) == 0; i <<= 1)
+ {
+ mpz_mul (mp_stack[top - 1],
+ mp_stack[top], mp_stack[top - 1]);
+ top--;
+ }
+ }
+ else
+ {
+ /* Put the single-limb partial product in P on the stack.
+ (The next time we get a single-limb product, we will
+ multiply the two together.) */
+ top++;
+ if (top > top_limit_so_far)
+ {
+ if (top > MP_STACK_SIZE)
+ abort();
+ /* The stack is now bigger than ever, initialize the top
+ element. */
+ mpz_init_set_ui (mp_stack[top], p);
+ top_limit_so_far++;
+ }
+ else
+ mpz_set_ui (mp_stack[top], p);
+ }
+
+ /* We ignored the last result from umul_ppmm. Put K in P as the
+ first component of the next single-limb partial product. */
+ p = k;
+ }
+ else
+ /* We didn't get overflow in umul_ppmm. Put p0 in P and try
+ with one more value of K. */
+ p = p0; /* bogus if long != mp_limb_t */
+ }
+
+ /* We have partial products in mp_stack[0..top], in descending order.
+ We also have a small partial product in p.
+ Their product is the final result. */
+ if (top < 0)
+ mpz_set_ui (result, p);
+ else
+ mpz_mul_ui (result, mp_stack[top--], p);
+ while (top >= 0)
+ mpz_mul (result, result, mp_stack[top--]);
+
+ /* Free the storage allocated for MP_STACK. */
+ for (top = top_limit_so_far; top >= 0; top--)
+ mpz_clear (mp_stack[top]);
+#endif
+}
diff --git a/rts/gmp/mpz/fdiv_q.c b/rts/gmp/mpz/fdiv_q.c
new file mode 100644
index 0000000000..9d75ca33d2
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_q.c
@@ -0,0 +1,51 @@
+/* mpz_fdiv_q -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_fdiv_q (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t dividend_size = dividend->_mp_size;
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t rem;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0)
+ mpz_sub_ui (quot, quot, 1L);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/fdiv_q_2exp.c b/rts/gmp/mpz/fdiv_q_2exp.c
new file mode 100644
index 0000000000..8e02180ecc
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_q_2exp.c
@@ -0,0 +1,104 @@
+/* mpz_fdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient
+ towards -infinity.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_q_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
+#else
+mpz_fdiv_q_2exp (w, u, cnt)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t wsize;
+ mp_size_t abs_usize = ABS (usize);
+ mp_size_t limb_cnt;
+ mp_ptr wp;
+ mp_limb_t round = 0;
+
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+ wsize = abs_usize - limb_cnt;
+ if (wsize <= 0)
+ {
+ wp = w->_mp_d;
+ wsize = 0;
+ /* Set ROUND since we know we skip some non-zero words in this case.
+ Well, if U is zero, we don't, but then this will be taken care of
+ below, since rounding only really takes place for negative U. */
+ round = 1;
+ wp[0] = 1;
+ w->_mp_size = -(usize < 0);
+ return;
+ }
+ else
+ {
+ mp_size_t i;
+ mp_ptr up;
+
+ /* Make sure there is enough space. We make an extra limb
+ here to account for possible rounding at the end. */
+ if (w->_mp_alloc < wsize + 1)
+ _mpz_realloc (w, wsize + 1);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ /* Set ROUND if we are about skip some non-zero limbs. */
+ for (i = 0; i < limb_cnt && round == 0; i++)
+ round = up[i];
+
+ cnt %= BITS_PER_MP_LIMB;
+ if (cnt != 0)
+ {
+ round |= mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+ wsize -= wp[wsize - 1] == 0;
+ }
+ else
+ {
+ MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+ }
+ }
+
+ if (usize < 0 && round != 0)
+ {
+ mp_limb_t cy;
+ if (wsize != 0)
+ {
+ cy = mpn_add_1 (wp, wp, wsize, (mp_limb_t) 1);
+ wp[wsize] = cy;
+ wsize += cy;
+ }
+ else
+ {
+ /* We shifted something negative to zero. The result is -1. */
+ wp[0] = 1;
+ wsize = 1;
+ }
+ }
+ w->_mp_size = usize >= 0 ? wsize : -wsize;
+}
diff --git a/rts/gmp/mpz/fdiv_q_ui.c b/rts/gmp/mpz/fdiv_q_ui.c
new file mode 100644
index 0000000000..55d2498693
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_q_ui.c
@@ -0,0 +1,65 @@
+/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_q_ui (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fdiv_qr.c b/rts/gmp/mpz/fdiv_qr.c
new file mode 100644
index 0000000000..06ce50607b
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_qr.c
@@ -0,0 +1,64 @@
+/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_fdiv_qr (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t xsize;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the quotient and
+ remainder have been preliminary calculated. We have to copy it to
+ temporary space if it's the same variable as either QUOT or REM. */
+ if (quot == divisor || rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ xsize = dividend->_mp_size ^ divisor_size;;
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if (xsize < 0 && rem->_mp_size != 0)
+ {
+ mpz_sub_ui (quot, quot, 1L);
+ mpz_add (rem, rem, divisor);
+ }
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/fdiv_qr_ui.c b/rts/gmp/mpz/fdiv_qr_ui.c
new file mode 100644
index 0000000000..600c0dacfc
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_qr_ui.c
@@ -0,0 +1,69 @@
+/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_qr_ui (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = remainder_limb != 0;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fdiv_r.c b/rts/gmp/mpz/fdiv_r.c
new file mode 100644
index 0000000000..a3652838d2
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_r.c
@@ -0,0 +1,58 @@
+/* mpz_fdiv_r -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_fdiv_r (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the remainder has been
+ preliminary calculated. We have to copy it to temporary space if it's
+ the same variable as REM. */
+ if (rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ mpz_tdiv_r (rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0)
+ mpz_add (rem, rem, divisor);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/fdiv_r_2exp.c b/rts/gmp/mpz/fdiv_r_2exp.c
new file mode 100644
index 0000000000..081ce19203
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_r_2exp.c
@@ -0,0 +1,156 @@
+/* mpz_fdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_r_2exp (mpz_ptr res, mpz_srcptr in, unsigned long int cnt)
+#else
+mpz_fdiv_r_2exp (res, in, cnt)
+ mpz_ptr res;
+ mpz_srcptr in;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t in_size = ABS (in->_mp_size);
+ mp_size_t res_size;
+ mp_size_t limb_cnt = cnt / BITS_PER_MP_LIMB;
+ mp_srcptr in_ptr = in->_mp_d;
+
+ if (in_size > limb_cnt)
+ {
+ /* The input operand is (probably) greater than 2**CNT. */
+ mp_limb_t x;
+
+ x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % BITS_PER_MP_LIMB) - 1);
+ if (x != 0)
+ {
+ res_size = limb_cnt + 1;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ res->_mp_d[limb_cnt] = x;
+ }
+ else
+ {
+ res_size = limb_cnt;
+ MPN_NORMALIZE (in_ptr, res_size);
+
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+ }
+ else
+ {
+ /* The input operand is smaller than 2**CNT. We perform a no-op,
+ apart from that we might need to copy IN to RES, and may need
+ to round the result. */
+ res_size = in_size;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+
+ if (res != in)
+ MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
+ in_size = in->_mp_size;
+ res->_mp_size = res_size;
+ if (in_size < 0 && res_size != 0)
+ {
+ /* Result should be 2^CNT - RES */
+ mpz_t tmp;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ MPZ_TMP_INIT (tmp, cnt/BITS_PER_MP_LIMB + 2);
+ mpz_set_ui (tmp, 1L);
+ mpz_mul_2exp (tmp, tmp, cnt);
+ mpz_sub (res, tmp, res);
+ TMP_FREE (marker);
+ }
+}
+
+/* This is an alternative ending of the above function using just low-level
+ functions. Tested, but perhaps excessive? */
+#if 0
+ if (in->_mp_size < 0 && res_size != 0)
+ {
+ /* Result should be 2^CNT - RES */
+
+ mp_ptr rp;
+
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+
+ if (res->_mp_alloc <= limb_cnt)
+ _mpz_realloc (res, limb_cnt + 1);
+ rp = PTR(res);
+ if (res_size > limb_cnt)
+ {
+ mpn_nz_neg (rp, rp, res_size);
+ rp[limb_cnt] &= ~(~(mp_limb_t) 0 << cnt % BITS_PER_MP_LIMB);
+ MPN_NORMALIZE_NOT_ZERO (rp, res_size);
+ }
+ else
+ {
+ mp_size_t i;
+ mpn_nz_neg (rp, rp, res_size);
+ for (i = res_size; i < limb_cnt; i++)
+ rp[i] = ~ (mp_limb_t) 0;
+ res_size = limb_cnt;
+ if (cnt % BITS_PER_MP_LIMB != 0)
+ {
+ rp[res_size] = ((mp_limb_t) 1 << (cnt % BITS_PER_MP_LIMB)) - 1;
+ res_size++;
+ }
+ else
+ MPN_NORMALIZE_NOT_ZERO (rp, res_size);
+ }
+ }
+ SIZ(res) = res_size;
+}
+
+static void
+mpn_nz_neg (rp, sp, n)
+ mp_ptr rp, sp;
+ mp_size_t n;
+{
+ mp_size_t i;
+ mp_limb_t x;
+
+ x = sp[0];
+ rp[0] = -x;
+ for (i = 1; x == 0; i++)
+ {
+ x = sp[i];
+ rp[i] = -x;
+ }
+
+ for (; i < n; i++)
+ {
+ rp[i] = ~sp[i];
+ }
+}
+#endif
diff --git a/rts/gmp/mpz/fdiv_r_ui.c b/rts/gmp/mpz/fdiv_r_ui.c
new file mode 100644
index 0000000000..dd5c743d27
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_r_ui.c
@@ -0,0 +1,55 @@
+/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_r_ui (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ remainder_limb = divisor - remainder_limb;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = remainder_limb != 0;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fdiv_ui.c b/rts/gmp/mpz/fdiv_ui.c
new file mode 100644
index 0000000000..f937b5f6d0
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_ui.c
@@ -0,0 +1,48 @@
+/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_ui (dividend, divisor)
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ remainder_limb = divisor - remainder_limb;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fib_ui.c b/rts/gmp/mpz/fib_ui.c
new file mode 100644
index 0000000000..4bebb80d94
--- /dev/null
+++ b/rts/gmp/mpz/fib_ui.c
@@ -0,0 +1,165 @@
+/* mpz_fib_ui(result, n) -- Set RESULT to the Nth Fibonacci number.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* This is fast, but could be made somewhat faster and neater.
+ The timing is somewhat fluctuating for even/odd sizes because
+ of the extra hair used to save variables and operations. Here
+ are a few things one might want to address:
+ 1. Avoid using 4 intermediate variables in mpz_fib_bigcase.
+ 2. Call mpn functions directly. Straightforward for these functions.
+ 3. Merge the three functions into one.
+
+Said by Kevin:
+ Consider using the Lucas numbers L[n] as an auxiliary sequence, making
+ it possible to do the "doubling" operation in mpz_fib_bigcase with two
+ squares rather than two multiplies. The formulas are a little more
+ complicated, something like the following (untested).
+
+ F[2n] = ((F[n]+L[n])^2 - 6*F[n]^2 - 4*(-1)^n) / 2
+ L[2n] = 5*F[n]^2 + 2*(-1)^n
+
+ F[2n+1] = (F[2n] + L[2n]) / 2
+ L[2n+1] = (5*F[2n] + L[2n]) / 2
+
+ The Lucas number that comes for free here could even be returned.
+
+ Maybe there's formulas with two squares using just F[n], but I don't
+ know of any.
+*/
+
+/* Determine the needed storage for Fib(n). */
+#define FIB_SIZE(n) (((mp_size_t) ((n)*0.695)) / BITS_PER_MP_LIMB + 2)
+
+static void mpz_fib_bigcase _PROTO ((mpz_t, mpz_t, unsigned long int));
+static void mpz_fib_basecase _PROTO ((mpz_t, mpz_t, unsigned long int));
+
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 60
+#endif
+
+void
+#if __STDC__
+mpz_fib_ui (mpz_t r, unsigned long int n)
+#else
+mpz_fib_ui (r, n)
+ mpz_t r;
+ unsigned long int n;
+#endif
+{
+ if (n == 0)
+ mpz_set_ui (r, 0);
+ else
+ {
+ mpz_t t1;
+ mpz_init (t1);
+ if (n < FIB_THRESHOLD)
+ mpz_fib_basecase (t1, r, n);
+ else
+ mpz_fib_bigcase (t1, r, n);
+ mpz_clear (t1);
+ }
+}
+
+static void
+#if __STDC__
+mpz_fib_basecase (mpz_t t1, mpz_t t2, unsigned long int n)
+#else
+mpz_fib_basecase (t1, t2, n)
+ mpz_t t1;
+ mpz_t t2;
+ unsigned long int n;
+#endif
+{
+ unsigned long int m, i;
+
+ mpz_set_ui (t1, 0);
+ mpz_set_ui (t2, 1);
+ m = n/2;
+ for (i = 0; i < m; i++)
+ {
+ mpz_add (t1, t1, t2);
+ mpz_add (t2, t1, t2);
+ }
+ if ((n & 1) == 0)
+ {
+ mpz_sub (t1, t2, t1);
+ mpz_sub (t2, t2, t1); /* trick: recover t1 value just overwritten */
+ }
+}
+
+static void
+#if __STDC__
+mpz_fib_bigcase (mpz_t t1, mpz_t t2, unsigned long int n)
+#else
+mpz_fib_bigcase (t1, t2, n)
+ mpz_t t1;
+ mpz_t t2;
+ unsigned long int n;
+#endif
+{
+ unsigned long int n2;
+ int ni, i;
+ mpz_t x1, x2, u1, u2;
+
+ ni = 0;
+ for (n2 = n; n2 >= FIB_THRESHOLD; n2 /= 2)
+ ni++;
+
+ mpz_fib_basecase (t1, t2, n2);
+
+ mpz_init (x1);
+ mpz_init (x2);
+ mpz_init (u1);
+ mpz_init (u2);
+
+ for (i = ni - 1; i >= 0; i--)
+ {
+ mpz_mul_2exp (x1, t1, 1);
+ mpz_mul_2exp (x2, t2, 1);
+
+ mpz_add (x1, x1, t2);
+ mpz_sub (x2, x2, t1);
+
+ mpz_mul (u1, t2, x1);
+ mpz_mul (u2, t1, x2);
+
+ if (((n >> i) & 1) == 0)
+ {
+ mpz_sub (t1, u1, u2);
+ mpz_set (t2, u1);
+ }
+ else
+ {
+ mpz_set (t1, u1);
+ mpz_mul_2exp (t2, u1, 1);
+ mpz_sub (t2, t2, u2);
+ }
+ }
+
+ mpz_clear (x1);
+ mpz_clear (x2);
+ mpz_clear (u1);
+ mpz_clear (u2);
+}
diff --git a/rts/gmp/mpz/fits_sint_p.c b/rts/gmp/mpz/fits_sint_p.c
new file mode 100644
index 0000000000..82e32a24d5
--- /dev/null
+++ b/rts/gmp/mpz/fits_sint_p.c
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_sint_p (mpz_srcptr src)
+#else
+mpz_fits_sint_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size > 0)
+ {
+ if (size > 1)
+ return 0;
+ return mpl < ~((~(unsigned int) 0) >> 1);
+ }
+ else
+ {
+ if (size < -1)
+ return 0;
+ return mpl <= ~((~(unsigned int) 0) >> 1);
+ }
+}
diff --git a/rts/gmp/mpz/fits_slong_p.c b/rts/gmp/mpz/fits_slong_p.c
new file mode 100644
index 0000000000..e0669b5aaa
--- /dev/null
+++ b/rts/gmp/mpz/fits_slong_p.c
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_slong_p (mpz_srcptr src)
+#else
+mpz_fits_slong_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size > 0)
+ {
+ if (size > 1)
+ return 0;
+ return mpl < ~((~(unsigned long int) 0) >> 1);
+ }
+ else
+ {
+ if (size < -1)
+ return 0;
+ return mpl <= ~((~(unsigned long int) 0) >> 1);
+ }
+}
diff --git a/rts/gmp/mpz/fits_sshort_p.c b/rts/gmp/mpz/fits_sshort_p.c
new file mode 100644
index 0000000000..5b8e31afae
--- /dev/null
+++ b/rts/gmp/mpz/fits_sshort_p.c
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_sshort_p (mpz_srcptr src)
+#else
+mpz_fits_sshort_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size > 0)
+ {
+ if (size > 1)
+ return 0;
+ return mpl <= (((unsigned short int) ~(unsigned int) 0) >> 1);
+ }
+ else
+ {
+ if (size < -1)
+ return 0;
+ return mpl <= (((unsigned short int) ~(unsigned int) 0) >> 1) + 1;
+ }
+}
diff --git a/rts/gmp/mpz/fits_uint_p.c b/rts/gmp/mpz/fits_uint_p.c
new file mode 100644
index 0000000000..72f62fa723
--- /dev/null
+++ b/rts/gmp/mpz/fits_uint_p.c
@@ -0,0 +1,41 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_uint_p (mpz_srcptr src)
+#else
+mpz_fits_uint_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size < 0 || size > 1)
+ return 0;
+ return mpl <= (~(unsigned int) 0);
+}
diff --git a/rts/gmp/mpz/fits_ulong_p.c b/rts/gmp/mpz/fits_ulong_p.c
new file mode 100644
index 0000000000..92eb42e86e
--- /dev/null
+++ b/rts/gmp/mpz/fits_ulong_p.c
@@ -0,0 +1,41 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_ulong_p (mpz_srcptr src)
+#else
+mpz_fits_ulong_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size < 0 || size > 1)
+ return 0;
+ return mpl <= (~(unsigned long int) 0);
+}
diff --git a/rts/gmp/mpz/fits_ushort_p.c b/rts/gmp/mpz/fits_ushort_p.c
new file mode 100644
index 0000000000..bde0edae6e
--- /dev/null
+++ b/rts/gmp/mpz/fits_ushort_p.c
@@ -0,0 +1,41 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_ushort_p (mpz_srcptr src)
+#else
+mpz_fits_ushort_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size < 0 || size > 1)
+ return 0;
+ return mpl <= ((unsigned short int) ~(unsigned int) 0);
+}
diff --git a/rts/gmp/mpz/gcd.c b/rts/gmp/mpz/gcd.c
new file mode 100644
index 0000000000..0d950dd609
--- /dev/null
+++ b/rts/gmp/mpz/gcd.c
@@ -0,0 +1,180 @@
+/* mpz/gcd.c: Calculate the greatest common divisor of two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_gcd (g, u, v)
+ mpz_ptr g;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
+#else
+gcd (u, v, g)
+ mpz_ptr g;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#endif /* BERKELEY_MP */
+
+{
+ unsigned long int g_zero_bits, u_zero_bits, v_zero_bits;
+ mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
+ mp_ptr tp;
+ mp_ptr up = u->_mp_d;
+ mp_size_t usize = ABS (u->_mp_size);
+ mp_ptr vp = v->_mp_d;
+ mp_size_t vsize = ABS (v->_mp_size);
+ mp_size_t gsize;
+ TMP_DECL (marker);
+
+ /* GCD(0, V) == V. */
+ if (usize == 0)
+ {
+ g->_mp_size = vsize;
+ if (g == v)
+ return;
+ if (g->_mp_alloc < vsize)
+ _mpz_realloc (g, vsize);
+ MPN_COPY (g->_mp_d, vp, vsize);
+ return;
+ }
+
+ /* GCD(U, 0) == U. */
+ if (vsize == 0)
+ {
+ g->_mp_size = usize;
+ if (g == u)
+ return;
+ if (g->_mp_alloc < usize)
+ _mpz_realloc (g, usize);
+ MPN_COPY (g->_mp_d, up, usize);
+ return;
+ }
+
+ if (usize == 1)
+ {
+ g->_mp_size = 1;
+ g->_mp_d[0] = mpn_gcd_1 (vp, vsize, up[0]);
+ return;
+ }
+
+ if (vsize == 1)
+ {
+ g->_mp_size = 1;
+ g->_mp_d[0] = mpn_gcd_1 (up, usize, vp[0]);
+ return;
+ }
+
+ TMP_MARK (marker);
+
+ /* Eliminate low zero bits from U and V and move to temporary storage. */
+ while (*up == 0)
+ up++;
+ u_zero_limbs = up - u->_mp_d;
+ usize -= u_zero_limbs;
+ count_trailing_zeros (u_zero_bits, *up);
+ tp = up;
+ up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB);
+ if (u_zero_bits != 0)
+ {
+ mpn_rshift (up, tp, usize, u_zero_bits);
+ usize -= up[usize - 1] == 0;
+ }
+ else
+ MPN_COPY (up, tp, usize);
+
+ while (*vp == 0)
+ vp++;
+ v_zero_limbs = vp - v->_mp_d;
+ vsize -= v_zero_limbs;
+ count_trailing_zeros (v_zero_bits, *vp);
+ tp = vp;
+ vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB);
+ if (v_zero_bits != 0)
+ {
+ mpn_rshift (vp, tp, vsize, v_zero_bits);
+ vsize -= vp[vsize - 1] == 0;
+ }
+ else
+ MPN_COPY (vp, tp, vsize);
+
+ if (u_zero_limbs > v_zero_limbs)
+ {
+ g_zero_limbs = v_zero_limbs;
+ g_zero_bits = v_zero_bits;
+ }
+ else if (u_zero_limbs < v_zero_limbs)
+ {
+ g_zero_limbs = u_zero_limbs;
+ g_zero_bits = u_zero_bits;
+ }
+ else /* Equal. */
+ {
+ g_zero_limbs = u_zero_limbs;
+ g_zero_bits = MIN (u_zero_bits, v_zero_bits);
+ }
+
+ /* Call mpn_gcd. The 2nd argument must not have more bits than the 1st. */
+ vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
+ ? mpn_gcd (vp, vp, vsize, up, usize)
+ : mpn_gcd (vp, up, usize, vp, vsize);
+
+ /* Here G <-- V << (g_zero_limbs*BITS_PER_MP_LIMB + g_zero_bits). */
+ gsize = vsize + g_zero_limbs;
+ if (g_zero_bits != 0)
+ {
+ mp_limb_t cy_limb;
+ gsize += (vp[vsize - 1] >> (BITS_PER_MP_LIMB - g_zero_bits)) != 0;
+ if (g->_mp_alloc < gsize)
+ _mpz_realloc (g, gsize);
+ MPN_ZERO (g->_mp_d, g_zero_limbs);
+
+ tp = g->_mp_d + g_zero_limbs;
+ cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
+ if (cy_limb != 0)
+ tp[vsize] = cy_limb;
+ }
+ else
+ {
+ if (g->_mp_alloc < gsize)
+ _mpz_realloc (g, gsize);
+ MPN_ZERO (g->_mp_d, g_zero_limbs);
+ MPN_COPY (g->_mp_d + g_zero_limbs, vp, vsize);
+ }
+
+ g->_mp_size = gsize;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/gcd_ui.c b/rts/gmp/mpz/gcd_ui.c
new file mode 100644
index 0000000000..f3bec58829
--- /dev/null
+++ b/rts/gmp/mpz/gcd_ui.c
@@ -0,0 +1,65 @@
+/* mpz_gcd_ui -- Calculate the greatest common divisior of two integers.
+
+Copyright (C) 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_gcd_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+#else
+mpz_gcd_ui (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int v;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t res;
+
+ size = ABS (u->_mp_size);
+
+ if (size == 0)
+ res = v;
+ else if (v == 0)
+ {
+ if (w != NULL && u != w)
+ {
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ MPN_COPY (w->_mp_d, u->_mp_d, size);
+ }
+ w->_mp_size = size;
+ /* We can't return any useful result for gcd(big,0). */
+ return size > 1 ? 0 : w->_mp_d[0];
+ }
+ else
+ res = mpn_gcd_1 (u->_mp_d, size, (mp_limb_t) v);
+
+ if (w != NULL)
+ {
+ w->_mp_d[0] = res;
+ w->_mp_size = 1;
+ }
+ return res;
+}
diff --git a/rts/gmp/mpz/gcdext.c b/rts/gmp/mpz/gcdext.c
new file mode 100644
index 0000000000..3ba04c84ff
--- /dev/null
+++ b/rts/gmp/mpz/gcdext.c
@@ -0,0 +1,137 @@
+/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that
+ g = as + bt.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)
+#else
+mpz_gcdext (g, s, t, a, b)
+ mpz_ptr g;
+ mpz_ptr s;
+ mpz_ptr t;
+ mpz_srcptr a;
+ mpz_srcptr b;
+#endif
+{
+ mp_size_t asize, bsize, usize, vsize;
+ mp_srcptr ap, bp;
+ mp_ptr up, vp;
+ mp_size_t gsize, ssize, tmp_ssize;
+ mp_ptr gp, sp, tmp_gp, tmp_sp;
+ mpz_srcptr u, v;
+ mpz_ptr ss, tt;
+ __mpz_struct stmp, gtmp;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* mpn_gcdext requires that U >= V. Therefore, we often have to swap U and
+ V. This in turn leads to a lot of complications. The computed cofactor
+ will be the wrong one, so we have to fix that up at the end. */
+
+ asize = ABS (SIZ (a));
+ bsize = ABS (SIZ (b));
+ ap = PTR (a);
+ bp = PTR (b);
+ if (asize > bsize || (asize == bsize && mpn_cmp (ap, bp, asize) > 0))
+ {
+ usize = asize;
+ vsize = bsize;
+ up = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+ vp = (mp_ptr) TMP_ALLOC ((vsize + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (up, ap, usize);
+ MPN_COPY (vp, bp, vsize);
+ u = a;
+ v = b;
+ ss = s;
+ tt = t;
+ }
+ else
+ {
+ usize = bsize;
+ vsize = asize;
+ up = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+ vp = (mp_ptr) TMP_ALLOC ((vsize + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (up, bp, usize);
+ MPN_COPY (vp, ap, vsize);
+ u = b;
+ v = a;
+ ss = t;
+ tt = s;
+ }
+
+ tmp_gp = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+ tmp_sp = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+
+ if (vsize == 0)
+ {
+ tmp_sp[0] = 1;
+ tmp_ssize = 1;
+ MPN_COPY (tmp_gp, up, usize);
+ gsize = usize;
+ }
+ else
+ gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, up, usize, vp, vsize);
+ ssize = ABS (tmp_ssize);
+
+ PTR (&gtmp) = tmp_gp;
+ SIZ (&gtmp) = gsize;
+
+ PTR (&stmp) = tmp_sp;
+ SIZ (&stmp) = (tmp_ssize ^ SIZ (u)) >= 0 ? ssize : -ssize;
+
+ if (tt != NULL)
+ {
+ if (SIZ (v) == 0)
+ SIZ (tt) = 0;
+ else
+ {
+ mpz_t x;
+ MPZ_TMP_INIT (x, ssize + usize + 1);
+ mpz_mul (x, &stmp, u);
+ mpz_sub (x, &gtmp, x);
+ mpz_tdiv_q (tt, x, v);
+ }
+ }
+
+ if (ss != NULL)
+ {
+ if (ALLOC (ss) < ssize)
+ _mpz_realloc (ss, ssize);
+ sp = PTR (ss);
+ MPN_COPY (sp, tmp_sp, ssize);
+ SIZ (ss) = SIZ (&stmp);
+ }
+
+ if (ALLOC (g) < gsize)
+ _mpz_realloc (g, gsize);
+ gp = PTR (g);
+ MPN_COPY (gp, tmp_gp, gsize);
+ SIZ (g) = gsize;
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/get_d.c b/rts/gmp/mpz/get_d.c
new file mode 100644
index 0000000000..6a7c5856bb
--- /dev/null
+++ b/rts/gmp/mpz/get_d.c
@@ -0,0 +1,128 @@
+/* double mpz_get_d (mpz_t src) -- Return the double approximation to SRC.
+
+Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static int
+#if __STDC__
+mpn_zero_p (mp_ptr p, mp_size_t n)
+#else
+mpn_zero_p (p, n)
+ mp_ptr p;
+ mp_size_t n;
+#endif
+{
+ mp_size_t i;
+
+ for (i = 0; i < n; i++)
+ {
+ if (p[i] != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+
+double
+#if __STDC__
+mpz_get_d (mpz_srcptr src)
+#else
+mpz_get_d (src)
+ mpz_srcptr src;
+#endif
+{
+ double res;
+ mp_size_t size;
+ int negative;
+ mp_ptr qp;
+ mp_limb_t hz, lz;
+ int cnt;
+
+ size = SIZ(src);
+ if (size == 0)
+ return 0.0;
+
+ negative = size < 0;
+ size = ABS (size);
+ qp = PTR(src);
+
+ if (size == 1)
+ {
+ res = qp[size - 1];
+ }
+ else if (size == 2)
+ {
+ res = MP_BASE_AS_DOUBLE * qp[size - 1] + qp[size - 2];
+ }
+ else
+ {
+ count_leading_zeros (cnt, qp[size - 1]);
+
+#if BITS_PER_MP_LIMB == 32
+ if (cnt == 0)
+ {
+ hz = qp[size - 1];
+ lz = qp[size - 2];
+ }
+ else
+ {
+ hz = (qp[size - 1] << cnt) | (qp[size - 2] >> BITS_PER_MP_LIMB - cnt);
+ lz = (qp[size - 2] << cnt) | (qp[size - 3] >> BITS_PER_MP_LIMB - cnt);
+ }
+#if _GMP_IEEE_FLOATS
+ /* Take bits from less significant limbs, but only if they may affect
+ the result. */
+ if ((lz & 0x7ff) == 0x400)
+ {
+ if (cnt != 0)
+ lz += ((qp[size - 3] << cnt) != 0 || ! mpn_zero_p (qp, size - 3));
+ else
+ lz += (! mpn_zero_p (qp, size - 2));
+ }
+#endif
+ res = MP_BASE_AS_DOUBLE * hz + lz;
+ res = __gmp_scale2 (res, (size - 2) * BITS_PER_MP_LIMB - cnt);
+#endif
+#if BITS_PER_MP_LIMB == 64
+ if (cnt == 0)
+ hz = qp[size - 1];
+ else
+ hz = (qp[size - 1] << cnt) | (qp[size - 2] >> BITS_PER_MP_LIMB - cnt);
+#if _GMP_IEEE_FLOATS
+ if ((hz & 0x7ff) == 0x400)
+ {
+ if (cnt != 0)
+ hz += ((qp[size - 2] << cnt) != 0 || ! mpn_zero_p (qp, size - 2));
+ else
+ hz += (! mpn_zero_p (qp, size - 1));
+ }
+#endif
+ res = hz;
+ res = __gmp_scale2 (res, (size - 1) * BITS_PER_MP_LIMB - cnt);
+#endif
+ }
+
+ return negative ? -res : res;
+}
diff --git a/rts/gmp/mpz/get_si.c b/rts/gmp/mpz/get_si.c
new file mode 100644
index 0000000000..8a5d0e4803
--- /dev/null
+++ b/rts/gmp/mpz/get_si.c
@@ -0,0 +1,43 @@
+/* mpz_get_si(integer) -- Return the least significant digit from INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+signed long int
+#if __STDC__
+mpz_get_si (mpz_srcptr op)
+#else
+mpz_get_si (op)
+ mpz_srcptr op;
+#endif
+{
+ mp_size_t size = op->_mp_size;
+ mp_limb_t low_limb = op->_mp_d[0];
+
+ if (size > 0)
+ return low_limb % ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
+ else if (size < 0)
+ /* This convoluted expression is necessary to properly handle 0x80000000 */
+ return ~((low_limb - 1) % ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)));
+ else
+ return 0;
+}
diff --git a/rts/gmp/mpz/get_str.c b/rts/gmp/mpz/get_str.c
new file mode 100644
index 0000000000..c7278afb52
--- /dev/null
+++ b/rts/gmp/mpz/get_str.c
@@ -0,0 +1,118 @@
+/* mpz_get_str (string, base, mp_src) -- Convert the multiple precision
+ number MP_SRC to a string STRING of base BASE. If STRING is NULL
+ allocate space for the result. In any case, return a pointer to the
+ result. If STRING is not NULL, the caller must ensure enough space is
+ available to store the result.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+char *
+#if __STDC__
+mpz_get_str (char *res_str, int base, mpz_srcptr x)
+#else
+mpz_get_str (res_str, base, x)
+ char *res_str;
+ int base;
+ mpz_srcptr x;
+#endif
+{
+ mp_ptr xp;
+ mp_size_t x_size = x->_mp_size;
+ unsigned char *str;
+ char *return_str;
+ size_t str_size;
+ char *num_to_text;
+ int i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ if (base >= 0)
+ {
+ if (base == 0)
+ base = 10;
+ num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+ }
+ else
+ {
+ base = -base;
+ num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ }
+
+ /* We allways allocate space for the string. If the caller passed a
+ NULL pointer for RES_STR, we allocate permanent space and return
+ a pointer to that to the caller. */
+ str_size = ((size_t) (ABS (x_size) * BITS_PER_MP_LIMB
+ * __mp_bases[base].chars_per_bit_exactly)) + 3;
+ if (res_str == 0)
+ {
+ /* We didn't get a string from the user. Allocate one (and return
+ a pointer to it). */
+ res_str = (char *) (*_mp_allocate_func) (str_size);
+ /* Make str, the variable used for raw result from mpn_get_str,
+ point to the same string, but just after a possible minus sign. */
+ str = (unsigned char *) res_str + 1;
+ }
+ else
+ {
+ /* Use TMP_ALLOC to get temporary space, since we need a few extra bytes
+ that we can't expect to caller to supply us with. */
+ str = (unsigned char *) TMP_ALLOC (str_size);
+ }
+
+ return_str = res_str;
+
+ if (x_size == 0)
+ {
+ res_str[0] = '0';
+ res_str[1] = 0;
+ TMP_FREE (marker);
+ return res_str;
+ }
+ if (x_size < 0)
+ {
+ *res_str++ = '-';
+ x_size = -x_size;
+ }
+
+ /* Move the number to convert into temporary space, since mpn_get_str
+ clobbers its argument + needs one extra high limb.... */
+ xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (xp, x->_mp_d, x_size);
+
+ str_size = mpn_get_str (str, base, xp, x_size);
+
+ /* mpn_get_str might make some leading zeros. Skip them. */
+ while (*str == 0)
+ {
+ str_size--;
+ str++;
+ }
+
+ /* Translate result to printable chars and move result to RES_STR. */
+ for (i = 0; i < str_size; i++)
+ res_str[i] = num_to_text[str[i]];
+ res_str[str_size] = 0;
+
+ TMP_FREE (marker);
+ return return_str;
+}
diff --git a/rts/gmp/mpz/get_ui.c b/rts/gmp/mpz/get_ui.c
new file mode 100644
index 0000000000..a8ec9e01a4
--- /dev/null
+++ b/rts/gmp/mpz/get_ui.c
@@ -0,0 +1,37 @@
+/* mpz_get_ui(integer) -- Return the least significant digit from INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_get_ui (mpz_srcptr integer)
+#else
+mpz_get_ui (integer)
+ mpz_srcptr integer;
+#endif
+{
+ if (integer->_mp_size == 0)
+ return 0;
+ else
+ return integer->_mp_d[0];
+}
diff --git a/rts/gmp/mpz/getlimbn.c b/rts/gmp/mpz/getlimbn.c
new file mode 100644
index 0000000000..b772ed05c4
--- /dev/null
+++ b/rts/gmp/mpz/getlimbn.c
@@ -0,0 +1,38 @@
+/* mpz_getlimbn(integer,n) -- Return the N:th limb from INTEGER.
+
+Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpz_getlimbn (mpz_srcptr integer, mp_size_t n)
+#else
+mpz_getlimbn (integer, n)
+ mpz_srcptr integer;
+ mp_size_t n;
+#endif
+{
+ if (ABS (integer->_mp_size) <= n || n < 0)
+ return 0;
+ else
+ return integer->_mp_d[n];
+}
diff --git a/rts/gmp/mpz/hamdist.c b/rts/gmp/mpz/hamdist.c
new file mode 100644
index 0000000000..b039a653d2
--- /dev/null
+++ b/rts/gmp/mpz/hamdist.c
@@ -0,0 +1,62 @@
+/* mpz_hamdist(mpz_ptr op1, mpz_ptr op2) -- Compute the hamming distance
+ between OP1 and OP2. If one of the operands is negative, return ~0. (We
+ could make the function well-defined when both operands are negative, but
+ that would probably not be worth the trouble.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_hamdist (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_hamdist (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+{
+ mp_srcptr up, vp;
+ mp_size_t usize, vsize, size;
+ unsigned long int count;
+
+ usize = u->_mp_size;
+ vsize = v->_mp_size;
+
+ if ((usize | vsize) < 0)
+ return ~ (unsigned long int) 0;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ if (usize > vsize)
+ {
+ count = mpn_popcount (up + vsize, usize - vsize);
+ size = vsize;
+ }
+ else
+ {
+ count = mpn_popcount (vp + usize, vsize - usize);
+ size = usize;
+ }
+
+ return count + mpn_hamdist (up, vp, size);
+}
diff --git a/rts/gmp/mpz/init.c b/rts/gmp/mpz/init.c
new file mode 100644
index 0000000000..2e8e4d2cbd
--- /dev/null
+++ b/rts/gmp/mpz/init.c
@@ -0,0 +1,36 @@
+/* mpz_init() -- Make a new multiple precision number with value 0.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init (mpz_ptr x)
+#else
+mpz_init (x)
+ mpz_ptr x;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ x->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/inp_raw.c b/rts/gmp/mpz/inp_raw.c
new file mode 100644
index 0000000000..15e601229d
--- /dev/null
+++ b/rts/gmp/mpz/inp_raw.c
@@ -0,0 +1,101 @@
+/* mpz_inp_raw -- Input a mpz_t in raw, but endianess, and wordsize
+ independent format (as output by mpz_out_raw).
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_inp_raw (mpz_ptr x, FILE *stream)
+#else
+mpz_inp_raw (x, stream)
+ mpz_ptr x;
+ FILE *stream;
+#endif
+{
+ int i;
+ mp_size_t s;
+ mp_size_t xsize;
+ mp_ptr xp;
+ unsigned int c;
+ mp_limb_t x_limb;
+ mp_size_t in_bytesize;
+ int neg_flag;
+
+ if (stream == 0)
+ stream = stdin;
+
+ /* Read 4-byte size */
+ in_bytesize = 0;
+ for (i = 4 - 1; i >= 0; i--)
+ {
+ c = fgetc (stream);
+ in_bytesize = (in_bytesize << BITS_PER_CHAR) | c;
+ }
+
+ /* Size is stored as a 32 bit word; sign extend in_bytesize for non-32 bit
+ machines. */
+ if (sizeof (mp_size_t) > 4)
+ in_bytesize |= (-(in_bytesize < 0)) << 31;
+
+ neg_flag = in_bytesize < 0;
+ in_bytesize = ABS (in_bytesize);
+ xsize = (in_bytesize + BYTES_PER_MP_LIMB - 1) / BYTES_PER_MP_LIMB;
+
+ if (xsize == 0)
+ {
+ x->_mp_size = 0;
+ return 4; /* we've read 4 bytes */
+ }
+
+ if (x->_mp_alloc < xsize)
+ _mpz_realloc (x, xsize);
+ xp = x->_mp_d;
+
+ x_limb = 0;
+ for (i = (in_bytesize - 1) % BYTES_PER_MP_LIMB; i >= 0; i--)
+ {
+ c = fgetc (stream);
+ x_limb = (x_limb << BITS_PER_CHAR) | c;
+ }
+ xp[xsize - 1] = x_limb;
+
+ for (s = xsize - 2; s >= 0; s--)
+ {
+ x_limb = 0;
+ for (i = BYTES_PER_MP_LIMB - 1; i >= 0; i--)
+ {
+ c = fgetc (stream);
+ x_limb = (x_limb << BITS_PER_CHAR) | c;
+ }
+ xp[s] = x_limb;
+ }
+
+ if (c == EOF)
+ return 0; /* error */
+
+ MPN_NORMALIZE (xp, xsize);
+ x->_mp_size = neg_flag ? -xsize : xsize;
+ return in_bytesize + 4;
+}
diff --git a/rts/gmp/mpz/inp_str.c b/rts/gmp/mpz/inp_str.c
new file mode 100644
index 0000000000..7aa5e1fc30
--- /dev/null
+++ b/rts/gmp/mpz/inp_str.c
@@ -0,0 +1,167 @@
+/* mpz_inp_str(dest_integer, stream, base) -- Input a number in base
+ BASE from stdio stream STREAM and store the result in DEST_INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static int
+#if __STDC__
+digit_value_in_base (int c, int base)
+#else
+digit_value_in_base (c, base)
+ int c;
+ int base;
+#endif
+{
+ int digit;
+
+ if (isdigit (c))
+ digit = c - '0';
+ else if (islower (c))
+ digit = c - 'a' + 10;
+ else if (isupper (c))
+ digit = c - 'A' + 10;
+ else
+ return -1;
+
+ if (digit < base)
+ return digit;
+ return -1;
+}
+
+size_t
+#if __STDC__
+mpz_inp_str (mpz_ptr x, FILE *stream, int base)
+#else
+mpz_inp_str (x, stream, base)
+ mpz_ptr x;
+ FILE *stream;
+ int base;
+#endif
+{
+ char *str;
+ size_t alloc_size, str_size;
+ int c;
+ int negative;
+ mp_size_t xsize;
+ size_t nread;
+
+ if (stream == 0)
+ stream = stdin;
+
+ nread = 0;
+
+ /* Skip whitespace. */
+ do
+ {
+ c = getc (stream);
+ nread++;
+ }
+ while (isspace (c));
+
+ negative = 0;
+ if (c == '-')
+ {
+ negative = 1;
+ c = getc (stream);
+ nread++;
+ }
+
+ if (digit_value_in_base (c, base == 0 ? 10 : base) < 0)
+ return 0; /* error if no digits */
+
+ /* If BASE is 0, try to find out the base by looking at the initial
+ characters. */
+ if (base == 0)
+ {
+ base = 10;
+ if (c == '0')
+ {
+ base = 8;
+ c = getc (stream);
+ nread++;
+ if (c == 'x' || c == 'X')
+ {
+ base = 16;
+ c = getc (stream);
+ nread++;
+ }
+ else if (c == 'b' || c == 'B')
+ {
+ base = 2;
+ c = getc (stream);
+ nread++;
+ }
+ }
+ }
+
+ /* Skip leading zeros. */
+ while (c == '0')
+ {
+ c = getc (stream);
+ nread++;
+ }
+
+ alloc_size = 100;
+ str = (char *) (*_mp_allocate_func) (alloc_size);
+ str_size = 0;
+
+ for (;;)
+ {
+ int dig;
+ if (str_size >= alloc_size)
+ {
+ size_t old_alloc_size = alloc_size;
+ alloc_size = alloc_size * 3 / 2;
+ str = (char *) (*_mp_reallocate_func) (str, old_alloc_size, alloc_size);
+ }
+ dig = digit_value_in_base (c, base);
+ if (dig < 0)
+ break;
+ str[str_size++] = dig;
+ c = getc (stream);
+ }
+
+ ungetc (c, stream);
+
+ /* Make sure the string is not empty, mpn_set_str would fail. */
+ if (str_size == 0)
+ {
+ x->_mp_size = 0;
+ (*_mp_free_func) (str, alloc_size);
+ return nread;
+ }
+
+ xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly))
+ / BITS_PER_MP_LIMB + 2);
+ if (x->_mp_alloc < xsize)
+ _mpz_realloc (x, xsize);
+
+ /* Convert the byte array in base BASE to our bignum format. */
+ xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base);
+ x->_mp_size = negative ? -xsize : xsize;
+
+ (*_mp_free_func) (str, alloc_size);
+ return str_size + nread;
+}
diff --git a/rts/gmp/mpz/invert.c b/rts/gmp/mpz/invert.c
new file mode 100644
index 0000000000..749a0969fc
--- /dev/null
+++ b/rts/gmp/mpz/invert.c
@@ -0,0 +1,77 @@
+/* mpz_invert (inv, x, n). Find multiplicative inverse of X in Z(N).
+ If X has an inverse, return non-zero and store inverse in INVERSE,
+ otherwise, return 0 and put garbage in INVERSE.
+
+Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)
+#else
+mpz_invert (inverse, x, n)
+ mpz_ptr inverse;
+ mpz_srcptr x, n;
+#endif
+{
+ mpz_t gcd, tmp;
+ mp_size_t xsize, nsize, size;
+ TMP_DECL (marker);
+
+ xsize = SIZ (x);
+ nsize = SIZ (n);
+ xsize = ABS (xsize);
+ nsize = ABS (nsize);
+ size = MAX (xsize, nsize) + 1;
+
+ /* No inverse exists if the leftside operand is 0. Likewise, no
+ inverse exists if the mod operand is 1. */
+ if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1))
+ return 0;
+
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (gcd, size);
+ MPZ_TMP_INIT (tmp, size);
+ mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);
+
+ /* If no inverse existed, return with an indication of that. */
+ if (gcd->_mp_size != 1 || (gcd->_mp_d)[0] != 1)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+
+ /* Make sure we return a positive inverse. */
+ if (SIZ (tmp) < 0)
+ {
+ if (SIZ (n) < 0)
+ mpz_sub (inverse, tmp, n);
+ else
+ mpz_add (inverse, tmp, n);
+ }
+ else
+ mpz_set (inverse, tmp);
+
+ TMP_FREE (marker);
+ return 1;
+}
diff --git a/rts/gmp/mpz/ior.c b/rts/gmp/mpz/ior.c
new file mode 100644
index 0000000000..0bb5a806dc
--- /dev/null
+++ b/rts/gmp/mpz/ior.c
@@ -0,0 +1,244 @@
+/* mpz_ior -- Logical inclusive or.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+#else
+mpz_ior (res, op1, op2)
+ mpz_ptr res;
+ mpz_srcptr op1;
+ mpz_srcptr op2;
+#endif
+{
+ mp_srcptr op1_ptr, op2_ptr;
+ mp_size_t op1_size, op2_size;
+ mp_ptr res_ptr;
+ mp_size_t res_size;
+ mp_size_t i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op1_size = op1->_mp_size;
+ op2_size = op2->_mp_size;
+
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+
+ if (op1_size >= 0)
+ {
+ if (op2_size >= 0)
+ {
+ if (op1_size >= op2_size)
+ {
+ if (res->_mp_alloc < op1_size)
+ {
+ _mpz_realloc (res, op1_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op1_ptr)
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ if (res->_mp_alloc < op2_size)
+ {
+ _mpz_realloc (res, op2_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op2_ptr)
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ res->_mp_size = res_size;
+ return;
+ }
+ else /* op2_size < 0 */
+ {
+ /* Fall through to the code at the end of the function. */
+ }
+ }
+ else
+ {
+ if (op2_size < 0)
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+
+ /* Both operands are negative, so will be the result.
+ -((-OP1) | (-OP2)) = -(~(OP1 - 1) | ~(OP2 - 1)) =
+ = ~(~(OP1 - 1) | ~(OP2 - 1)) + 1 =
+ = ((OP1 - 1) & (OP2 - 1)) + 1 */
+
+ op1_size = -op1_size;
+ op2_size = -op2_size;
+
+ res_size = MIN (op1_size, op2_size);
+
+ /* Possible optimization: Decrease mpn_sub precision,
+ as we won't use the entire res of both. */
+ opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);
+ op1_ptr = opx;
+
+ opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to
+ temporary space--never to the space RES->_mp_d used
+ to point to before reallocation. */
+ }
+
+ /* First loop finds the size of the result. */
+ for (i = res_size - 1; i >= 0; i--)
+ if ((op1_ptr[i] & op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+
+ if (res_size != 0)
+ {
+ /* Second loop computes the real result. */
+ for (i = res_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+ }
+ else
+ {
+ res_ptr[0] = 1;
+ res_size = 1;
+ }
+
+ res->_mp_size = -res_size;
+ TMP_FREE (marker);
+ return;
+ }
+ else
+ {
+ /* We should compute -OP1 | OP2. Swap OP1 and OP2 and fall
+ through to the code that handles OP1 | -OP2. */
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ }
+ }
+
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+ mp_size_t res_alloc;
+ mp_size_t count;
+
+ /* Operand 2 negative, so will be the result.
+ -(OP1 | (-OP2)) = -(OP1 | ~(OP2 - 1)) =
+ = ~(OP1 | ~(OP2 - 1)) + 1 =
+ = (~OP1 & (OP2 - 1)) + 1 */
+
+ op2_size = -op2_size;
+
+ res_alloc = op2_size;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+ op2_size -= op2_ptr[op2_size - 1] == 0;
+
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ op1_ptr = op1->_mp_d;
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ if (op1_size >= op2_size)
+ {
+ /* We can just ignore the part of OP1 that stretches above OP2,
+ because the result limbs are zero there. */
+
+ /* First loop finds the size of the result. */
+ for (i = op2_size - 1; i >= 0; i--)
+ if ((~op1_ptr[i] & op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+ count = res_size;
+ }
+ else
+ {
+ res_size = op2_size;
+
+ /* Copy the part of OP2 that stretches above OP1, to RES. */
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+ count = op1_size;
+ }
+
+ if (res_size != 0)
+ {
+ /* Second loop computes the real result. */
+ for (i = count - 1; i >= 0; i--)
+ res_ptr[i] = ~op1_ptr[i] & op2_ptr[i];
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+ }
+ else
+ {
+ res_ptr[0] = 1;
+ res_size = 1;
+ }
+
+ res->_mp_size = -res_size;
+ }
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/iset.c b/rts/gmp/mpz/iset.c
new file mode 100644
index 0000000000..114bc2d542
--- /dev/null
+++ b/rts/gmp/mpz/iset.c
@@ -0,0 +1,49 @@
+/* mpz_init_set (src_integer) -- Make a new multiple precision number with
+ a value copied from SRC_INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_init_set (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+
+ usize = u->_mp_size;
+ size = ABS (usize);
+
+ w->_mp_alloc = MAX (size, 1);
+ w->_mp_d = (mp_ptr) (*_mp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ w->_mp_size = usize;
+}
diff --git a/rts/gmp/mpz/iset_d.c b/rts/gmp/mpz/iset_d.c
new file mode 100644
index 0000000000..502a8933e2
--- /dev/null
+++ b/rts/gmp/mpz/iset_d.c
@@ -0,0 +1,39 @@
+/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double
+ value VAL.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set_d (mpz_ptr dest, double val)
+#else
+mpz_init_set_d (dest, val)
+ mpz_ptr dest;
+ double val;
+#endif
+{
+ dest->_mp_alloc = 1;
+ dest->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ dest->_mp_size = 0;
+ mpz_set_d (dest, val);
+}
diff --git a/rts/gmp/mpz/iset_si.c b/rts/gmp/mpz/iset_si.c
new file mode 100644
index 0000000000..842db140ef
--- /dev/null
+++ b/rts/gmp/mpz/iset_si.c
@@ -0,0 +1,49 @@
+/* mpz_init_set_si(val) -- Make a new multiple precision number with
+ value val.
+
+Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set_si (mpz_ptr x, signed long int val)
+#else
+mpz_init_set_si (x, val)
+ mpz_ptr x;
+ signed long int val;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ if (val > 0)
+ {
+ x->_mp_d[0] = val;
+ x->_mp_size = 1;
+ }
+ else if (val < 0)
+ {
+ x->_mp_d[0] = (unsigned long) -val;
+ x->_mp_size = -1;
+ }
+ else
+ x->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/iset_str.c b/rts/gmp/mpz/iset_str.c
new file mode 100644
index 0000000000..dfb8c6b230
--- /dev/null
+++ b/rts/gmp/mpz/iset_str.c
@@ -0,0 +1,47 @@
+/* mpz_init_set_str(string, base) -- Convert the \0-terminated string
+ STRING in base BASE to a multiple precision integer. Return a MP_INT
+ structure representing the integer. Allow white space in the
+ string. If BASE == 0 determine the base in the C standard way,
+ i.e. 0xhh...h means base 16, 0oo...o means base 8, otherwise
+ assume base 10.
+
+Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_init_set_str (mpz_ptr x, const char *str, int base)
+#else
+mpz_init_set_str (x, str, base)
+ mpz_ptr x;
+ const char *str;
+ int base;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+
+ /* if str has no digits mpz_set_str leaves x->_mp_size unset */
+ x->_mp_size = 0;
+
+ return mpz_set_str (x, str, base);
+}
diff --git a/rts/gmp/mpz/iset_ui.c b/rts/gmp/mpz/iset_ui.c
new file mode 100644
index 0000000000..759182c556
--- /dev/null
+++ b/rts/gmp/mpz/iset_ui.c
@@ -0,0 +1,39 @@
+/* mpz_init_set_ui(val) -- Make a new multiple precision number with
+ value val.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set_ui (mpz_ptr x, unsigned long int val)
+#else
+mpz_init_set_ui (x, val)
+ mpz_ptr x;
+ unsigned long int val;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ x->_mp_d[0] = val;
+ x->_mp_size = val != 0;
+}
diff --git a/rts/gmp/mpz/jacobi.c b/rts/gmp/mpz/jacobi.c
new file mode 100644
index 0000000000..9d49e1d0c6
--- /dev/null
+++ b/rts/gmp/mpz/jacobi.c
@@ -0,0 +1,53 @@
+/* mpz_jacobi (op1, op2).
+ Contributed by Bennet Yee (bsy) at Carnegie-Mellon University
+
+Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+
+/* Precondition: both p and q are positive */
+
+int
+#if __STDC__
+mpz_jacobi (mpz_srcptr pi, mpz_srcptr qi)
+#else
+mpz_jacobi (pi, qi)
+ mpz_srcptr pi, qi;
+#endif
+{
+#if GCDCHECK
+ int retval;
+ mpz_t gcdval;
+
+ mpz_init (gcdval);
+ mpz_gcd (gcdval, pi, qi);
+ if (!mpz_cmp_ui (gcdval, 1L))
+ {
+ /* J(ab,cb) = J(ab,c)J(ab,b) = J(ab,c)J(0,b) = J(ab,c)*0 */
+ retval = 0;
+ }
+ else
+ retval = mpz_legendre (pi, qi);
+ mpz_clear (gcdval);
+ return retval;
+#else
+ return mpz_legendre (pi, qi);
+#endif
+}
diff --git a/rts/gmp/mpz/kronsz.c b/rts/gmp/mpz/kronsz.c
new file mode 100644
index 0000000000..c8c6752224
--- /dev/null
+++ b/rts/gmp/mpz/kronsz.c
@@ -0,0 +1,126 @@
+/* mpz_si_kronecker -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+#if __STDC__
+mpz_si_kronecker (long a, mpz_srcptr b)
+#else
+mpz_si_kronecker (a, b)
+ long a;
+ mpz_srcptr b;
+#endif
+{
+ int b_abs_size;
+ mp_srcptr b_ptr;
+ mp_limb_t b_low;
+ int twos;
+ int result_bit1;
+
+ b_abs_size = ABSIZ (b);
+ if (b_abs_size == 0)
+ return JACOBI_S0 (a); /* (a/0) */
+
+ b_ptr = PTR(b);
+ b_low = b_ptr[0];
+
+ /* (0/b) = 1 if b=+/-1, 0 otherwise */
+ if (a == 0)
+ return (b_abs_size == 1) & (b_low == 1);
+
+ /* account for the effect of the sign of b, so can then ignore it */
+ result_bit1 = JACOBI_BSGN_SZ_BIT1 (a, b);
+
+ if ((b_low & 1) == 0)
+ {
+ /* b even */
+
+ if ((a & 1) == 0)
+ return 0; /* (a/b)=0 if both a,b even */
+
+ /* Require MP_BITS_PER_LIMB even, so that (a/2)^MP_BITS_PER_LIMB = 1,
+ and so that therefore there's no need to account for how many zero
+ limbs are stripped. */
+ ASSERT ((BITS_PER_MP_LIMB & 1) == 0);
+
+ MPN_STRIP_LOW_ZEROS_NOT_ZERO (b_ptr, b_abs_size);
+ b_low = b_ptr[0];
+
+ if ((b_low & 1) == 0)
+ {
+ /* odd a, even b */
+
+ mp_limb_t b_shl_bit1;
+
+ count_trailing_zeros (twos, b_low);
+
+ /* b_shl_bit1 is b>>twos, but with only bit 1 guaranteed */
+ if (twos == BITS_PER_MP_LIMB-1)
+ b_shl_bit1 = (b_abs_size == 1) ? 0 : (b_ptr[1] << 1);
+ else
+ b_shl_bit1 = (b_low >> twos);
+
+ result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_shl_bit1);
+ a = ABS(a);
+
+ if (a == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */
+
+ /* twos (a/2), reciprocity to (b/a), and (b/a) = (b mod a / b) */
+ return mpn_jacobi_base (mpn_mod_1_rshift (b_ptr, b_abs_size,
+ twos, a),
+ a,
+ result_bit1
+ ^ JACOBI_TWOS_U_BIT1 (twos, a)
+ ^ JACOBI_RECIP_UU_BIT1 (a, b_shl_bit1));
+ }
+ }
+
+ /* b odd */
+
+ result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
+ a = ABS(a);
+
+ /* (a/1) = 1 for any a */
+ if (b_abs_size == 1 && b_low == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1);
+
+ /* Note a is cast to unsigned because 0x80..00 doesn't fit in a signed. */
+ if ((a & 1) == 0)
+ {
+ count_trailing_zeros (twos, a);
+ a = ((unsigned long) a) >> twos;
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
+ }
+
+ if (a == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */
+
+ /* reciprocity to (b/a), and (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a), a,
+ result_bit1 ^ JACOBI_RECIP_UU_BIT1 (a, b_low));
+}
diff --git a/rts/gmp/mpz/kronuz.c b/rts/gmp/mpz/kronuz.c
new file mode 100644
index 0000000000..b877e6f64c
--- /dev/null
+++ b/rts/gmp/mpz/kronuz.c
@@ -0,0 +1,115 @@
+/* mpz_ui_kronecker -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+#if __STDC__
+mpz_ui_kronecker (unsigned long a, mpz_srcptr b)
+#else
+mpz_ui_kronecker (a, b)
+ unsigned long a;
+ mpz_srcptr b;
+#endif
+{
+ int b_abs_size;
+ mp_srcptr b_ptr;
+ mp_limb_t b_low;
+ int twos;
+ int result_bit1;
+
+ /* (a/0) */
+ b_abs_size = ABSIZ (b);
+ if (b_abs_size == 0)
+ return JACOBI_U0 (a);
+
+ /* (a/-1)=1 when a>=0, so the sign of b is ignored */
+ b_ptr = PTR(b);
+ b_low = b_ptr[0];
+
+ /* (0/1)=1; (0/-1)=1; (0/b)=0 for b!=+/-1
+ (1/b)=1, for any b */
+ if (a <= 1)
+ return (a == 1) | ((b_abs_size == 1) & (b_low == 1));
+
+ if (b_low & 1)
+ {
+ /* (a/1) = 1 for any a */
+ if (b_abs_size == 1 && b_low == 1)
+ return 1;
+
+ count_trailing_zeros (twos, a);
+ a >>= twos;
+ if (a == 1)
+ return JACOBI_TWOS_U (twos, b_low); /* powers of (2/b) only */
+
+ /* powers of (2/b); reciprocity to (b/a); (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a),
+ a,
+ JACOBI_TWOS_U_BIT1 (twos, b_low)
+ ^ JACOBI_RECIP_UU_BIT1 (b_low, a));
+ }
+
+ /* b is even; (a/2)=0 if a is even */
+ if ((a & 1) == 0)
+ return 0;
+
+ /* Require MP_BITS_PER_LIMB even, so (a/2)^MP_BITS_PER_LIMB = 1, and so we
+ don't have to pay attention to how many trailing zero limbs are
+ stripped. */
+ ASSERT ((BITS_PER_MP_LIMB & 1) == 0);
+
+ MPN_STRIP_LOW_ZEROS_NOT_ZERO (b_ptr, b_abs_size);
+ b_low = b_ptr[0];
+
+ if (b_low & 1)
+ /* reciprocity to (b/a); (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a),
+ a,
+ JACOBI_RECIP_UU_BIT1 (b_low, a));
+
+ count_trailing_zeros (twos, b_low);
+
+ /* reciprocity to get (b/a) */
+ if (twos == BITS_PER_MP_LIMB-1)
+ {
+ if (b_abs_size == 1)
+ {
+ /* b==0x800...00, one limb high bit only, so (a/2)^(BPML-1) */
+ return JACOBI_TWOS_U (BITS_PER_MP_LIMB-1, a);
+ }
+
+ /* b_abs_size > 1 */
+ result_bit1 = JACOBI_RECIP_UU_BIT1 (a, b_ptr[1] << 1);
+ }
+ else
+ result_bit1 = JACOBI_RECIP_UU_BIT1 (a, b_low >> twos);
+
+ /* powers of (a/2); reciprocity to (b/a); (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1_rshift (b_ptr, b_abs_size, twos, a),
+ a,
+ JACOBI_TWOS_U_BIT1 (twos, a) ^ result_bit1);
+}
diff --git a/rts/gmp/mpz/kronzs.c b/rts/gmp/mpz/kronzs.c
new file mode 100644
index 0000000000..edfb465976
--- /dev/null
+++ b/rts/gmp/mpz/kronzs.c
@@ -0,0 +1,74 @@
+/* mpz_kronecker_si -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function is expected to be often used with b odd, so there's a test
+ for this before invoking count_trailing_zeros().
+
+ After the absolute value of b is established it's treated as an unsigned
+ long, because 0x80..00 doesn't fit in a signed long. */
+
+int
+#if __STDC__
+mpz_kronecker_si (mpz_srcptr a, long b)
+#else
+mpz_kronecker_si (a, b)
+ mpz_srcptr a;
+ long b;
+#endif
+{
+ int result_bit1;
+ int twos;
+
+ if (b == 0)
+ return JACOBI_Z0 (a);
+
+ result_bit1 = JACOBI_BSGN_ZS_BIT1(a, b);
+ b = ABS (b);
+
+ if (b == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1); /* (a/1) = 1 for any a */
+
+ if (b & 1)
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, result_bit1);
+
+ /* result 0 if both a,b even */
+ if (mpz_even_p (a))
+ return 0;
+
+ /* (a/2)=(2/a) when a odd */
+ count_trailing_zeros (twos, b);
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(a)[0]);
+
+ b = ((unsigned long) b) >> twos;
+ if (b == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1);
+ else
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, result_bit1);
+}
+
+
diff --git a/rts/gmp/mpz/kronzu.c b/rts/gmp/mpz/kronzu.c
new file mode 100644
index 0000000000..749be5df07
--- /dev/null
+++ b/rts/gmp/mpz/kronzu.c
@@ -0,0 +1,66 @@
+/* mpz_kronecker_ui -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function is expected to be often used with b an odd prime, so the
+ code for odd b is nice and short. */
+
+int
+#if __STDC__
+mpz_kronecker_ui (mpz_srcptr a, unsigned long b)
+#else
+mpz_kronecker_ui (a, b)
+ mpz_srcptr a;
+ unsigned long b;
+#endif
+{
+ int twos;
+
+ if (b & 1)
+ {
+ if (b != 1)
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, 0);
+ else
+ return 1; /* (a/1)=1 for any a */
+ }
+
+ if (b == 0)
+ return JACOBI_Z0 (a);
+
+ /* (a/2)=0 if a even */
+ if (mpz_even_p (a))
+ return 0;
+
+ /* (a/2)=(2/a) when a odd */
+ count_trailing_zeros (twos, b);
+ b >>= twos;
+ if (b == 1)
+ return JACOBI_TWOS_U (twos, PTR(a)[0]);
+
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b,
+ JACOBI_TWOS_U_BIT1(twos, PTR(a)[0]));
+}
diff --git a/rts/gmp/mpz/lcm.c b/rts/gmp/mpz/lcm.c
new file mode 100644
index 0000000000..7495882ae5
--- /dev/null
+++ b/rts/gmp/mpz/lcm.c
@@ -0,0 +1,61 @@
+/* mpz/lcm.c: Calculate the least common multiple of two integers.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void *_mpz_realloc ();
+
+void
+#if __STDC__
+mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_lcm (r, u, v)
+ mpz_ptr r;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+{
+ mpz_t g;
+ mp_size_t usize, vsize, size;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ usize = ABS (SIZ (u));
+ vsize = ABS (SIZ (v));
+
+ if (usize == 0 || vsize == 0)
+ {
+ SIZ (r) = 0;
+ return;
+ }
+
+ size = MAX (usize, vsize);
+ MPZ_TMP_INIT (g, size);
+
+ mpz_gcd (g, u, v);
+ mpz_divexact (g, u, g);
+ mpz_mul (r, g, v);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/legendre.c b/rts/gmp/mpz/legendre.c
new file mode 100644
index 0000000000..ab665f70d0
--- /dev/null
+++ b/rts/gmp/mpz/legendre.c
@@ -0,0 +1,184 @@
+/* mpz_legendre (op1, op2).
+ Contributed by Bennet Yee (bsy) at Carnegie-Mellon University
+
+Copyright (C) 1992, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+
+#if defined (DEBUG)
+#include <stdio.h>
+#endif
+
+/* Precondition: both p and q are positive */
+
+int
+#if __STDC__
+mpz_legendre (mpz_srcptr pi, mpz_srcptr qi)
+#else
+mpz_legendre (pi, qi)
+mpz_srcptr pi, qi;
+#endif
+{
+ mpz_t p, q, qdiv2;
+#ifdef Q_MINUS_1
+ mpz_t q_minus_1;
+#endif
+ mpz_ptr mtmp;
+ register mpz_ptr pptr, qptr;
+ register int retval = 1;
+ register unsigned long int s;
+
+ pptr = p;
+ mpz_init_set (pptr, pi);
+ qptr = q;
+ mpz_init_set (qptr, qi);
+
+#ifdef Q_MINUS_1
+ mpz_init (q_minus_1);
+#endif
+ mpz_init (qdiv2);
+
+tail_recurse2:
+#ifdef DEBUG
+ printf ("tail_recurse2: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ s = mpz_scan1 (qptr, 0);
+ if (s) mpz_tdiv_q_2exp (qptr, qptr, s); /* J(a,2) = 1 */
+#ifdef DEBUG
+ printf ("2 factor decomposition: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ /* postcondition q odd */
+ if (!mpz_cmp_ui (qptr, 1L)) /* J(a,1) = 1 */
+ goto done;
+ mpz_mod (pptr, pptr, qptr); /* J(a,q) = J(b,q) when a == b mod q */
+#ifdef DEBUG
+ printf ("mod out by q: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ /* quick calculation to get approximate size first */
+ /* precondition: p < q */
+ if ((mpz_sizeinbase (pptr, 2) + 1 >= mpz_sizeinbase (qptr,2))
+ && (mpz_tdiv_q_2exp (qdiv2, qptr, 1L), mpz_cmp (pptr, qdiv2) > 0))
+ {
+ /* p > q/2 */
+ mpz_sub (pptr, qptr, pptr);
+ /* J(-1,q) = (-1)^((q-1)/2), q odd */
+ if (mpz_get_ui (qptr) & 2)
+ retval = -retval;
+ }
+ /* p < q/2 */
+#ifdef Q_MINUS_1
+ mpz_sub_ui (q_minus_q, qptr, 1L);
+#endif
+tail_recurse: /* we use tail_recurse only if q has not changed */
+#ifdef DEBUG
+ printf ("tail_recurse1: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ /*
+ * J(0,q) = 0
+ * this occurs only if gcd(p,q) != 1 which is never true for
+ * Legendre function.
+ */
+ if (!mpz_cmp_ui (pptr, 0L))
+ {
+ retval = 0;
+ goto done;
+ }
+
+ if (!mpz_cmp_ui (pptr, 1L))
+ {
+ /* J(1,q) = 1 */
+ /* retval *= 1; */
+ goto done;
+ }
+#ifdef Q_MINUS_1
+ if (!mpz_cmp (pptr, q_minus_1))
+ {
+ /* J(-1,q) = (-1)^((q-1)/2) */
+ if (mpz_get_ui (qptr) & 2)
+ retval = -retval;
+ /* else retval *= 1; */
+ goto done;
+ }
+#endif
+ /*
+ * we do not handle J(xy,q) except for x==2
+ * since we do not want to factor
+ */
+ if ((s = mpz_scan1 (pptr, 0)) != 0)
+ {
+ /*
+ * J(2,q) = (-1)^((q^2-1)/8)
+ *
+ * Note that q odd guarantees that q^2-1 is divisible by 8:
+ * Let a: q=2a+1. q^2 = 4a^2+4a+1, (q^2-1)/8 = a(a+1)/2, qed
+ *
+ * Now, note that this means that the low two bits of _a_
+ * (or the low bits of q shifted over by 1 determines
+ * the factor).
+ */
+ mpz_tdiv_q_2exp (pptr, pptr, s);
+
+ /* even powers of 2 gives J(2,q)^{2n} = 1 */
+ if (s & 1)
+ {
+ s = mpz_get_ui (qptr) >> 1;
+ s = s * (s + 1);
+ if (s & 2)
+ retval = -retval;
+ }
+ goto tail_recurse;
+ }
+ /*
+ * we know p is odd since we have cast out 2s
+ * precondition that q is odd guarantees both odd.
+ *
+ * quadratic reciprocity
+ * J(p,q) = (-1)^((p-1)(q-1)/4) * J(q,p)
+ */
+ if ((s = mpz_scan1 (pptr, 1)) <= 2 && (s + mpz_scan1 (qptr, 1)) <= 2)
+ retval = -retval;
+
+ mtmp = pptr; pptr = qptr; qptr = mtmp;
+ goto tail_recurse2;
+done:
+ mpz_clear (p);
+ mpz_clear (q);
+ mpz_clear (qdiv2);
+#ifdef Q_MINUS_1
+ mpz_clear (q_minus_1);
+#endif
+ return retval;
+}
diff --git a/rts/gmp/mpz/mod.c b/rts/gmp/mpz/mod.c
new file mode 100644
index 0000000000..87033b333b
--- /dev/null
+++ b/rts/gmp/mpz/mod.c
@@ -0,0 +1,63 @@
+/* mpz_mod -- The mathematical mod function.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_mod (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the remainder has been
+ preliminary calculated. We have to copy it to temporary space if it's
+ the same variable as REM. */
+ if (rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ mpz_tdiv_r (rem, dividend, divisor);
+
+ if (rem->_mp_size != 0)
+ {
+ if (dividend->_mp_size < 0)
+ if (divisor->_mp_size < 0)
+ mpz_sub (rem, rem, divisor);
+ else
+ mpz_add (rem, rem, divisor);
+ }
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/mul.c b/rts/gmp/mpz/mul.c
new file mode 100644
index 0000000000..7854788e50
--- /dev/null
+++ b/rts/gmp/mpz/mul.c
@@ -0,0 +1,131 @@
+/* mpz_mul -- Multiply two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_mul (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#else
+mult (u, v, w)
+ mpz_srcptr u;
+ mpz_srcptr v;
+ mpz_ptr w;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize = v->_mp_size;
+ mp_size_t wsize;
+ mp_size_t sign_product;
+ mp_ptr up, vp;
+ mp_ptr wp;
+ mp_ptr free_me = NULL;
+ size_t free_me_size;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ sign_product = usize ^ vsize;
+ usize = ABS (usize);
+ vsize = ABS (vsize);
+
+ if (usize < vsize)
+ {
+ /* Swap U and V. */
+ {const __mpz_struct *t = u; u = v; v = t;}
+ {mp_size_t t = usize; usize = vsize; vsize = t;}
+ }
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ wp = w->_mp_d;
+
+ /* Ensure W has space enough to store the result. */
+ wsize = usize + vsize;
+ if (w->_mp_alloc < wsize)
+ {
+ if (wp == up || wp == vp)
+ {
+ free_me = wp;
+ free_me_size = w->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (wp, w->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ w->_mp_alloc = wsize;
+ wp = (mp_ptr) (*_mp_allocate_func) (wsize * BYTES_PER_MP_LIMB);
+ w->_mp_d = wp;
+ }
+ else
+ {
+ /* Make U and V not overlap with W. */
+ if (wp == up)
+ {
+ /* W and U are identical. Allocate temporary space for U. */
+ up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB);
+ /* Is V identical too? Keep it identical with U. */
+ if (wp == vp)
+ vp = up;
+ /* Copy to the temporary space. */
+ MPN_COPY (up, wp, usize);
+ }
+ else if (wp == vp)
+ {
+ /* W and V are identical. Allocate temporary space for V. */
+ vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB);
+ /* Copy to the temporary space. */
+ MPN_COPY (vp, wp, vsize);
+ }
+ }
+
+ if (vsize == 0)
+ {
+ wsize = 0;
+ }
+ else
+ {
+ cy_limb = mpn_mul (wp, up, usize, vp, vsize);
+ wsize = usize + vsize;
+ wsize -= cy_limb == 0;
+ }
+
+ w->_mp_size = sign_product < 0 ? -wsize : wsize;
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/mul_2exp.c b/rts/gmp/mpz/mul_2exp.c
new file mode 100644
index 0000000000..abea5fed2c
--- /dev/null
+++ b/rts/gmp/mpz/mul_2exp.c
@@ -0,0 +1,76 @@
+/* mpz_mul_2exp -- Multiply a bignum by 2**CNT
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
+#else
+mpz_mul_2exp (w, u, cnt)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t abs_usize = ABS (usize);
+ mp_size_t wsize;
+ mp_size_t limb_cnt;
+ mp_ptr wp;
+ mp_limb_t wlimb;
+
+ if (usize == 0)
+ {
+ w->_mp_size = 0;
+ return;
+ }
+
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+ wsize = abs_usize + limb_cnt + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ wp = w->_mp_d;
+ wsize = abs_usize + limb_cnt;
+
+ cnt %= BITS_PER_MP_LIMB;
+ if (cnt != 0)
+ {
+ wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);
+ if (wlimb != 0)
+ {
+ wp[wsize] = wlimb;
+ wsize++;
+ }
+ }
+ else
+ {
+ MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);
+ }
+
+ /* Zero all whole limbs at low end. Do it here and not before calling
+ mpn_lshift, not to lose for U == W. */
+ MPN_ZERO (wp, limb_cnt);
+
+ w->_mp_size = usize >= 0 ? wsize : -wsize;
+}
diff --git a/rts/gmp/mpz/mul_siui.c b/rts/gmp/mpz/mul_siui.c
new file mode 100644
index 0000000000..9849cd41b0
--- /dev/null
+++ b/rts/gmp/mpz/mul_siui.c
@@ -0,0 +1,81 @@
+/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to
+ MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_mul_ui
+#define FUNCTION mpz_mul_ui
+#define MULTIPLICAND_UNSIGNED unsigned
+#define MULTIPLICAND_ABS(x) x
+#else
+#ifdef OPERATION_mul_si
+#define FUNCTION mpz_mul_si
+#define MULTIPLICAND_UNSIGNED
+#define MULTIPLICAND_ABS(x) ABS(x)
+#else
+Error, error, unrecognised OPERATION
+#endif
+#endif
+
+
+void
+#if __STDC__
+FUNCTION (mpz_ptr prod, mpz_srcptr mult,
+ MULTIPLICAND_UNSIGNED long int small_mult)
+#else
+FUNCTION (prod, mult, small_mult)
+ mpz_ptr prod;
+ mpz_srcptr mult;
+ MULTIPLICAND_UNSIGNED long int small_mult;
+#endif
+{
+ mp_size_t size = mult->_mp_size;
+ mp_size_t sign_product = size;
+ mp_limb_t cy;
+ mp_size_t prod_size;
+ mp_ptr prod_ptr;
+
+ if (size == 0 || small_mult == 0)
+ {
+ prod->_mp_size = 0;
+ return;
+ }
+ size = ABS (size);
+
+ prod_size = size + 1;
+ if (prod->_mp_alloc < prod_size)
+ _mpz_realloc (prod, prod_size);
+
+ prod_ptr = prod->_mp_d;
+
+ cy = mpn_mul_1 (prod_ptr, mult->_mp_d, size,
+ (mp_limb_t) MULTIPLICAND_ABS (small_mult));
+ if (cy != 0)
+ {
+ prod_ptr[size] = cy;
+ size++;
+ }
+
+ prod->_mp_size = ((sign_product < 0) ^ (small_mult < 0)) ? -size : size;
+}
diff --git a/rts/gmp/mpz/neg.c b/rts/gmp/mpz/neg.c
new file mode 100644
index 0000000000..566c3a95aa
--- /dev/null
+++ b/rts/gmp/mpz/neg.c
@@ -0,0 +1,53 @@
+/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_neg (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_neg (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+
+ usize = u->_mp_size;
+
+ if (u != w)
+ {
+ size = ABS (usize);
+
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ }
+
+ w->_mp_size = -usize;
+}
diff --git a/rts/gmp/mpz/nextprime.c b/rts/gmp/mpz/nextprime.c
new file mode 100644
index 0000000000..f024dd1206
--- /dev/null
+++ b/rts/gmp/mpz/nextprime.c
@@ -0,0 +1,120 @@
+/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_nextprime (mpz_ptr p, mpz_srcptr t)
+#else
+mpz_nextprime (p, t)
+ mpz_ptr p;
+ mpz_srcptr t;
+#endif
+{
+ mpz_add_ui (p, t, 1L);
+ while (! mpz_probab_prime_p (p, 5))
+ mpz_add_ui (p, p, 1L);
+}
+
+#if 0
+/* This code is not yet tested. Will be enabled in 3.1. */
+
+status unsigned short primes[] =
+{
+3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
+101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,
+191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,
+281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,
+389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,
+491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601,
+607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,
+719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,
+829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947,
+953,967,971,977,983,991,997
+};
+
+#define NUMBER_OF_PRIMES 167
+
+void
+#if __STDC__
+mpz_nextprime (mpz_ptr p, mpz_srcptr n)
+#else
+mpz_nextprime (p, n)
+ mpz_ptr p;
+ mpz_srcptr n;
+#endif
+{
+ mpz_t tmp;
+ unsigned short *moduli;
+ unsigned long difference;
+ int i;
+ int composite;
+
+ /* First handle tiny numbers */
+ if (mpz_cmp_ui (n, 2) < 0)
+ {
+ mpz_set_ui (p, 2);
+ return;
+ }
+ mpz_add_ui (p, n, 1);
+ mpz_setbit (p, 0);
+
+ if (mpz_cmp_ui (p, 7) <= 0)
+ return;
+
+ prime_limit = NUMBER_OF_PRIMES - 1;
+ if (mpz_cmp_ui (p, primes[prime_limit]) <= 0)
+ /* Just use first three entries (3,5,7) of table for small numbers */
+ prime_limit = 3;
+ if (prime_limit)
+ {
+ /* Compute residues modulo small odd primes */
+ moduli = (unsigned short *) TMP_ALLOC (prime_limit * sizeof moduli[0]);
+ for (i = 0; i < prime_limit; i++)
+ moduli[i] = mpz_fdiv_ui (p, primes[i]);
+ }
+ for (difference = 0; ; difference += 2)
+ {
+ composite = 0;
+
+ /* First check residues */
+ for (i = 0; i < prime_limit; i++)
+ {
+ int acc, pr;
+ composite |= (moduli[i] == 0);
+ acc = moduli[i] + 2;
+ pr = primes[i];
+ moduli[i] = acc >= pr ? acc - pr : acc;
+ }
+ if (composite)
+ continue;
+
+ mpz_add_ui (p, p, difference);
+ difference = 0;
+
+ /* Miller-Rabin test */
+ if (mpz_millerrabin (p, 2))
+ break;
+ }
+}
+#endif
diff --git a/rts/gmp/mpz/out_raw.c b/rts/gmp/mpz/out_raw.c
new file mode 100644
index 0000000000..62709479c5
--- /dev/null
+++ b/rts/gmp/mpz/out_raw.c
@@ -0,0 +1,89 @@
+/* mpz_out_raw -- Output a mpz_t in binary. Use an endianess and word size
+ independent format.
+
+Copyright (C) 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_out_raw (FILE *stream, mpz_srcptr x)
+#else
+mpz_out_raw (stream, x)
+ FILE *stream;
+ mpz_srcptr x;
+#endif
+{
+ int i;
+ mp_size_t s;
+ mp_size_t xsize = ABS (x->_mp_size);
+ mp_srcptr xp = x->_mp_d;
+ mp_size_t out_bytesize;
+ mp_limb_t hi_limb;
+ int n_bytes_in_hi_limb;
+
+ if (stream == 0)
+ stream = stdout;
+
+ if (xsize == 0)
+ {
+ for (i = 4 - 1; i >= 0; i--)
+ fputc (0, stream);
+ return ferror (stream) ? 0 : 4;
+ }
+
+ hi_limb = xp[xsize - 1];
+ for (i = BYTES_PER_MP_LIMB - 1; i > 0; i--)
+ {
+ if ((hi_limb >> i * BITS_PER_CHAR) != 0)
+ break;
+ }
+ n_bytes_in_hi_limb = i + 1;
+ out_bytesize = BYTES_PER_MP_LIMB * (xsize - 1) + n_bytes_in_hi_limb;
+ if (x->_mp_size < 0)
+ out_bytesize = -out_bytesize;
+
+ /* Make the size 4 bytes on all machines, to make the format portable. */
+ for (i = 4 - 1; i >= 0; i--)
+ fputc ((out_bytesize >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR),
+ stream);
+
+ /* Output from the most significant limb to the least significant limb,
+ with each limb also output in decreasing significance order. */
+
+ /* Output the most significant limb separately, since we will only
+ output some of its bytes. */
+ for (i = n_bytes_in_hi_limb - 1; i >= 0; i--)
+ fputc ((hi_limb >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), stream);
+
+ /* Output the remaining limbs. */
+ for (s = xsize - 2; s >= 0; s--)
+ {
+ mp_limb_t x_limb;
+
+ x_limb = xp[s];
+ for (i = BYTES_PER_MP_LIMB - 1; i >= 0; i--)
+ fputc ((x_limb >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), stream);
+ }
+ return ferror (stream) ? 0 : ABS (out_bytesize) + 4;
+}
diff --git a/rts/gmp/mpz/out_str.c b/rts/gmp/mpz/out_str.c
new file mode 100644
index 0000000000..bf971b0057
--- /dev/null
+++ b/rts/gmp/mpz/out_str.c
@@ -0,0 +1,108 @@
+/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.
+ integer INTEGER in base BASE.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_out_str (FILE *stream, int base, mpz_srcptr x)
+#else
+mpz_out_str (stream, base, x)
+ FILE *stream;
+ int base;
+ mpz_srcptr x;
+#endif
+{
+ mp_ptr xp;
+ mp_size_t x_size = x->_mp_size;
+ unsigned char *str;
+ size_t str_size;
+ size_t i;
+ size_t written;
+ char *num_to_text;
+ TMP_DECL (marker);
+
+ if (stream == 0)
+ stream = stdout;
+
+ if (base >= 0)
+ {
+ if (base == 0)
+ base = 10;
+ num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+ }
+ else
+ {
+ base = -base;
+ num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ }
+
+ if (x_size == 0)
+ {
+ fputc ('0', stream);
+ return ferror (stream) ? 0 : 1;
+ }
+
+ written = 0;
+
+ if (x_size < 0)
+ {
+ fputc ('-', stream);
+ x_size = -x_size;
+ written = 1;
+ }
+
+ TMP_MARK (marker);
+ str_size = ((size_t) (x_size * BITS_PER_MP_LIMB
+ * __mp_bases[base].chars_per_bit_exactly)) + 3;
+ str = (unsigned char *) TMP_ALLOC (str_size);
+
+ /* Move the number to convert into temporary space, since mpn_get_str
+ clobbers its argument + needs one extra high limb.... */
+ xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (xp, x->_mp_d, x_size);
+
+ str_size = mpn_get_str (str, base, xp, x_size);
+
+ /* mpn_get_str might make some leading zeros. Skip them. */
+ while (*str == 0)
+ {
+ str_size--;
+ str++;
+ }
+
+ /* Translate to printable chars. */
+ for (i = 0; i < str_size; i++)
+ str[i] = num_to_text[str[i]];
+ str[str_size] = 0;
+
+ {
+ size_t fwret;
+ fwret = fwrite ((char *) str, 1, str_size, stream);
+ written += fwret;
+ }
+
+ TMP_FREE (marker);
+ return ferror (stream) ? 0 : written;
+}
diff --git a/rts/gmp/mpz/perfpow.c b/rts/gmp/mpz/perfpow.c
new file mode 100644
index 0000000000..e71670a0be
--- /dev/null
+++ b/rts/gmp/mpz/perfpow.c
@@ -0,0 +1,272 @@
+/* mpz_perfect_power_p(arg) -- Return non-zero if ARG is a perfect power,
+ zero otherwise.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ We are to determine if c is a perfect power, c = a ^ b.
+ Assume c is divisible by 2^n and that codd = c/2^n is odd.
+ Assume a is divisible by 2^m and that aodd = a/2^m is odd.
+ It is always true that m divides n.
+
+ * If n is prime, either 1) a is 2*aodd and b = n
+ or 2) a = c and b = 1.
+ So for n prime, we readily have a solution.
+ * If n is factorable into the non-trivial factors p1,p2,...
+ Since m divides n, m has a subset of n's factors and b = n / m.
+
+ BUG: Should handle negative numbers, since they can be odd perfect powers.
+*/
+
+/* This is a naive approach to recognizing perfect powers.
+ Many things can be improved. In particular, we should use p-adic
+ arithmetic for computing possible roots. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static unsigned long int gcd _PROTO ((unsigned long int a, unsigned long int b));
+static int isprime _PROTO ((unsigned long int t));
+
+static const unsigned short primes[] =
+{ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
+ 59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131,
+ 137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,
+ 227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,
+ 313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,
+ 419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,
+ 509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,
+ 617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,
+ 727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,
+ 829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,
+ 947,953,967,971,977,983,991,997,0
+};
+#define SMALLEST_OMITTED_PRIME 1009
+
+
+int
+#if __STDC__
+mpz_perfect_power_p (mpz_srcptr u)
+#else
+mpz_perfect_power_p (u)
+ mpz_srcptr u;
+#endif
+{
+ unsigned long int prime;
+ unsigned long int n, n2;
+ int i;
+ unsigned long int rem;
+ mpz_t u2, q;
+ int exact;
+ mp_size_t uns;
+ TMP_DECL (marker);
+
+ if (mpz_cmp_ui (u, 1) <= 0)
+ return 0;
+
+ n2 = mpz_scan1 (u, 0);
+ if (n2 == 1)
+ return 0;
+
+ TMP_MARK (marker);
+
+ uns = ABSIZ (u) - n2 / BITS_PER_MP_LIMB;
+ MPZ_TMP_INIT (q, uns);
+ MPZ_TMP_INIT (u2, uns);
+
+ mpz_tdiv_q_2exp (u2, u, n2);
+
+ if (isprime (n2))
+ goto n2prime;
+
+ for (i = 1; primes[i] != 0; i++)
+ {
+ prime = primes[i];
+ rem = mpz_tdiv_ui (u2, prime);
+ if (rem == 0) /* divisable? */
+ {
+ rem = mpz_tdiv_q_ui (q, u2, prime * prime);
+ if (rem != 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+ mpz_swap (q, u2);
+ for (n = 2;;)
+ {
+ rem = mpz_tdiv_q_ui (q, u2, prime);
+ if (rem != 0)
+ break;
+ mpz_swap (q, u2);
+ n++;
+ }
+
+ n2 = gcd (n2, n);
+ if (n2 == 1)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+
+ /* As soon as n2 becomes a prime number, stop factoring.
+ Either we have u=x^n2 or u is not a perfect power. */
+ if (isprime (n2))
+ goto n2prime;
+ }
+ }
+
+ if (mpz_cmp_ui (u2, 1) == 0)
+ {
+ TMP_FREE (marker);
+ return 1;
+ }
+
+ if (n2 == 0)
+ {
+ unsigned long int nth;
+ /* We did not find any factors above. We have to consider all values
+ of n. */
+ for (nth = 2;; nth++)
+ {
+ if (! isprime (nth))
+ continue;
+#if 0
+ exact = mpz_padic_root (q, u2, nth, PTH);
+ if (exact)
+#endif
+ exact = mpz_root (q, u2, nth);
+ if (exact)
+ {
+ TMP_FREE (marker);
+ return 1;
+ }
+ if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+ }
+ }
+ else
+ {
+ unsigned long int nth;
+ /* We found some factors above. We just need to consider values of n
+ that divides n2. */
+ for (nth = 2; nth <= n2; nth++)
+ {
+ if (! isprime (nth))
+ continue;
+ if (n2 % nth != 0)
+ continue;
+#if 0
+ exact = mpz_padic_root (q, u2, nth, PTH);
+ if (exact)
+#endif
+ exact = mpz_root (q, u2, nth);
+ if (exact)
+ {
+ TMP_FREE (marker);
+ return 1;
+ }
+ if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+ }
+
+ TMP_FREE (marker);
+ return 0;
+ }
+
+n2prime:
+ exact = mpz_root (NULL, u2, n2);
+ TMP_FREE (marker);
+ return exact;
+}
+
+static unsigned long int
+#if __STDC__
+gcd (unsigned long int a, unsigned long int b)
+#else
+gcd (a, b)
+ unsigned long int a, b;
+#endif
+{
+ int an2, bn2, n2;
+
+ if (a == 0)
+ return b;
+ if (b == 0)
+ return a;
+
+ count_trailing_zeros (an2, a);
+ a >>= an2;
+
+ count_trailing_zeros (bn2, b);
+ b >>= bn2;
+
+ n2 = MIN (an2, bn2);
+
+ while (a != b)
+ {
+ if (a > b)
+ {
+ a -= b;
+ do
+ a >>= 1;
+ while ((a & 1) == 0);
+ }
+ else /* b > a. */
+ {
+ b -= a;
+ do
+ b >>= 1;
+ while ((b & 1) == 0);
+ }
+ }
+
+ return a << n2;
+}
+
+static int
+#if __STDC__
+isprime (unsigned long int t)
+#else
+isprime (t)
+ unsigned long int t;
+#endif
+{
+ unsigned long int q, r, d;
+
+ if (t < 3 || (t & 1) == 0)
+ return t == 2;
+
+ for (d = 3, r = 1; r != 0; d += 2)
+ {
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ }
+ return 0;
+}
diff --git a/rts/gmp/mpz/perfsqr.c b/rts/gmp/mpz/perfsqr.c
new file mode 100644
index 0000000000..92e8d08ea9
--- /dev/null
+++ b/rts/gmp/mpz/perfsqr.c
@@ -0,0 +1,45 @@
+/* mpz_perfect_square_p(arg) -- Return non-zero if ARG is a perfect square,
+ zero otherwise.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_perfect_square_p (mpz_srcptr a)
+#else
+mpz_perfect_square_p (a)
+ mpz_srcptr a;
+#endif
+{
+ mp_size_t asize = a->_mp_size;
+
+ /* No negative numbers are perfect squares. */
+ if (asize < 0)
+ return 0;
+
+ /* Zero is a perfect square. */
+ if (asize == 0)
+ return 1;
+
+ return mpn_perfect_square_p (a->_mp_d, asize);
+}
diff --git a/rts/gmp/mpz/popcount.c b/rts/gmp/mpz/popcount.c
new file mode 100644
index 0000000000..3105258e26
--- /dev/null
+++ b/rts/gmp/mpz/popcount.c
@@ -0,0 +1,42 @@
+/* mpz_popcount(mpz_ptr op) -- Population count of OP. If the operand is
+ negative, return ~0 (a novel representation of infinity).
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_popcount (mpz_srcptr u)
+#else
+mpz_popcount (u)
+ mpz_srcptr u;
+#endif
+{
+ mp_size_t usize;
+
+ usize = u->_mp_size;
+
+ if ((usize) < 0)
+ return ~ (unsigned long int) 0;
+
+ return mpn_popcount (u->_mp_d, usize);
+}
diff --git a/rts/gmp/mpz/pow_ui.c b/rts/gmp/mpz/pow_ui.c
new file mode 100644
index 0000000000..96ca114e4d
--- /dev/null
+++ b/rts/gmp/mpz/pow_ui.c
@@ -0,0 +1,129 @@
+/* mpz_pow_ui(res, base, exp) -- Set RES to BASE**EXP.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_pow_ui (mpz_ptr r, mpz_srcptr b, unsigned long int e)
+#else
+mpz_pow_ui (r, b, e)
+ mpz_ptr r;
+ mpz_srcptr b;
+ unsigned long int e;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+rpow (const MINT *b, signed short int e, MINT *r)
+#else
+rpow (b, e, r)
+ const MINT *b;
+ signed short int e;
+ MINT *r;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_ptr rp, bp, tp, xp;
+ mp_size_t ralloc, rsize, bsize;
+ int cnt, i;
+ mp_limb_t blimb;
+ TMP_DECL (marker);
+
+ bsize = ABS (b->_mp_size);
+
+ /* Single out cases that give result == 0 or 1. These tests are here
+ to simplify the general code below, not to optimize. */
+ if (e == 0)
+ {
+ r->_mp_d[0] = 1;
+ r->_mp_size = 1;
+ return;
+ }
+ if (bsize == 0
+#ifdef BERKELEY_MP
+ || e < 0
+#endif
+ )
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ bp = b->_mp_d;
+
+ blimb = bp[bsize - 1];
+ if (bsize == 1 && blimb < 0x100)
+ {
+ /* Estimate space requirements accurately. Using the code from the
+ `else' path would over-estimate space requirements wildly. */
+ float lb = __mp_bases[blimb].chars_per_bit_exactly;
+ ralloc = 3 + ((mp_size_t) (e / lb) / BITS_PER_MP_LIMB);
+ }
+ else
+ {
+ /* Over-estimate space requirements somewhat. */
+ count_leading_zeros (cnt, blimb);
+ ralloc = bsize * e - cnt * e / BITS_PER_MP_LIMB + 2;
+ }
+
+ TMP_MARK (marker);
+
+ /* The two areas are used to alternatingly hold the input and recieve the
+ product for mpn_mul. (This scheme is used to fulfill the requirements
+ of mpn_mul; that the product space may not be the same as any of the
+ input operands.) */
+ rp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+ tp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+
+ MPN_COPY (rp, bp, bsize);
+ rsize = bsize;
+ count_leading_zeros (cnt, e);
+
+ for (i = BITS_PER_MP_LIMB - cnt - 2; i >= 0; i--)
+ {
+ mpn_mul_n (tp, rp, rp, rsize);
+ rsize = 2 * rsize;
+ rsize -= tp[rsize - 1] == 0;
+ xp = tp; tp = rp; rp = xp;
+
+ if ((e & ((mp_limb_t) 1 << i)) != 0)
+ {
+ rsize = rsize + bsize - (mpn_mul (tp, rp, rsize, bp, bsize) == 0);
+ xp = tp; tp = rp; rp = xp;
+ }
+ }
+
+ /* Now then we know the exact space requirements, reallocate if
+ necessary. */
+ if (r->_mp_alloc < rsize)
+ _mpz_realloc (r, rsize);
+
+ MPN_COPY (r->_mp_d, rp, rsize);
+ r->_mp_size = (e & 1) == 0 || b->_mp_size >= 0 ? rsize : -rsize;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/powm.c b/rts/gmp/mpz/powm.c
new file mode 100644
index 0000000000..e6af855a71
--- /dev/null
+++ b/rts/gmp/mpz/powm.c
@@ -0,0 +1,364 @@
+/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+/* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */
+static void
+#if __STDC__
+mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim)
+#else
+mpz_redc (c, a, b, m, Nprim)
+ mpz_ptr c;
+ mpz_srcptr a;
+ mpz_srcptr b;
+ mpz_srcptr m;
+ mp_limb_t Nprim;
+#endif
+{
+ mp_ptr cp, mp = PTR (m);
+ mp_limb_t cy, cout = 0;
+ mp_limb_t q;
+ size_t j, n = ABSIZ (m);
+
+ ASSERT (ALLOC (c) >= 2 * n);
+
+ mpz_mul (c, a, b);
+ cp = PTR (c);
+ j = ABSIZ (c);
+ MPN_ZERO (cp + j, 2 * n - j);
+ for (j = 0; j < n; j++)
+ {
+ q = cp[0] * Nprim;
+ cy = mpn_addmul_1 (cp, mp, n, q);
+ cout += mpn_add_1 (cp + n, cp + n, n - j, cy);
+ cp++;
+ }
+ cp -= n;
+ if (cout)
+ {
+ cy = cout - mpn_sub_n (cp, cp + n, mp, n);
+ while (cy)
+ cy -= mpn_sub_n (cp, cp, mp, n);
+ }
+ else
+ MPN_COPY (cp, cp + n, n);
+ MPN_NORMALIZE (cp, n);
+ SIZ (c) = SIZ (c) < 0 ? -n : n;
+}
+
+/* average number of calls to redc for an exponent of n bits
+ with the sliding window algorithm of base 2^k: the optimal is
+ obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
+
+ n\k 4 5 6 7 8
+ 128 156* 159 171 200 261
+ 256 309 307* 316 343 403
+ 512 617 607* 610 632 688
+ 1024 1231 1204 1195* 1207 1256
+ 2048 2461 2399 2366 2360* 2396
+ 4096 4918 4787 4707 4665* 4670
+*/
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod)
+#else
+mpz_powm (res, base, e, mod)
+ mpz_ptr res;
+ mpz_srcptr base;
+ mpz_srcptr e;
+ mpz_srcptr mod;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res)
+#else
+pow (base, e, mod, res)
+ mpz_srcptr base;
+ mpz_srcptr e;
+ mpz_srcptr mod;
+ mpz_ptr res;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_limb_t invm, *ep, c, mask;
+ mpz_t xx, *g;
+ mp_size_t n, i, K, j, l, k;
+ int sh;
+ int use_redc;
+
+#ifdef POWM_DEBUG
+ mpz_t exp;
+ mpz_init (exp);
+#endif
+
+ n = ABSIZ (mod);
+
+ if (n == 0)
+ DIVIDE_BY_ZERO;
+
+ if (SIZ (e) == 0)
+ {
+ /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+ depending on if MOD equals 1. */
+ SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1;
+ PTR(res)[0] = 1;
+ return;
+ }
+
+ /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.
+ In REDC each modular multiplication costs about 2*n^2 limbs operations,
+ whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a
+ multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
+ for example using Burnikel-Ziegler's algorithm. This gives a theoretical
+ threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
+ 2.66. */
+ /* For now, also disable REDC when MOD is even, as the inverse can't
+ handle that. */
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3)
+#endif
+
+ use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0);
+ if (use_redc)
+ {
+ /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
+ modlimb_invert (invm, PTR(mod)[0]);
+ invm = -invm;
+ }
+
+ /* determines optimal value of k */
+ l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */
+ k = 1;
+ K = 2;
+ while (2 * l > K * (2 + k * (3 + k)))
+ {
+ k++;
+ K *= 2;
+ }
+
+ g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t));
+ /* compute x*R^n where R=2^BITS_PER_MP_LIMB */
+ mpz_init (g[0]);
+ if (use_redc)
+ {
+ mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB);
+ mpz_mod (g[0], g[0], mod);
+ }
+ else
+ mpz_mod (g[0], base, mod);
+
+ /* compute xx^g for odd g < 2^k */
+ mpz_init (xx);
+ if (use_redc)
+ {
+ _mpz_realloc (xx, 2 * n);
+ mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */
+ }
+ else
+ {
+ mpz_mul (xx, g[0], g[0]);
+ mpz_mod (xx, xx, mod);
+ }
+ for (i = 1; i < K / 2; i++)
+ {
+ mpz_init (g[i]);
+ if (use_redc)
+ {
+ _mpz_realloc (g[i], 2 * n);
+ mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */
+ }
+ else
+ {
+ mpz_mul (g[i], g[i - 1], xx);
+ mpz_mod (g[i], g[i], mod);
+ }
+ }
+
+ /* now starts the real stuff */
+ mask = (mp_limb_t) ((1<<k) - 1);
+ ep = PTR (e);
+ i = ABSIZ (e) - 1; /* current index */
+ c = ep[i]; /* current limb */
+ count_leading_zeros (sh, c);
+ sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */
+ sh -= k; /* index of lower bit of ep[i] to take into account */
+ if (sh < 0)
+ { /* k-sh extra bits are needed */
+ if (i > 0)
+ {
+ i--;
+ c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
+ sh += BITS_PER_MP_LIMB;
+ }
+ }
+ else
+ c = c >> sh;
+#ifdef POWM_DEBUG
+ printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm);
+ mpz_set_ui (exp, c);
+#endif
+ j=0;
+ while (c % 2 == 0)
+ {
+ j++;
+ c = (c >> 1);
+ }
+ mpz_set (xx, g[c >> 1]);
+ while (j--)
+ {
+ if (use_redc)
+ mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ }
+
+#ifdef POWM_DEBUG
+ printf ("x^"); mpz_out_str (0, 10, exp);
+ printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
+ putchar ('\n');
+#endif
+
+ while (i > 0 || sh > 0)
+ {
+ c = ep[i];
+ sh -= k;
+ l = k; /* number of bits treated */
+ if (sh < 0)
+ {
+ if (i > 0)
+ {
+ i--;
+ c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
+ sh += BITS_PER_MP_LIMB;
+ }
+ else
+ {
+ l += sh; /* may be less bits than k here */
+ c = c & ((1<<l) - 1);
+ }
+ }
+ else
+ c = c >> sh;
+ c = c & mask;
+
+ /* this while loop implements the sliding window improvement */
+ while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0))
+ {
+ if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ if (sh)
+ {
+ sh--;
+ c = (c<<1) + ((ep[i]>>sh) & 1);
+ }
+ else
+ {
+ i--;
+ sh = BITS_PER_MP_LIMB - 1;
+ c = (c<<1) + (ep[i]>>sh);
+ }
+ }
+
+#ifdef POWM_DEBUG
+ printf ("l=%u c=%lu\n", l, c);
+ mpz_mul_2exp (exp, exp, k);
+ mpz_add_ui (exp, exp, c);
+#endif
+
+ /* now replace xx by xx^(2^k)*x^c */
+ if (c != 0)
+ {
+ j = 0;
+ while (c % 2 == 0)
+ {
+ j++;
+ c = c >> 1;
+ }
+ /* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */
+ l -= j;
+ while (l--)
+ if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ if (use_redc)
+ mpz_redc (xx, xx, g[c >> 1], mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, g[c >> 1]);
+ mpz_mod (xx, xx, mod);
+ }
+ }
+ else
+ j = l; /* case c=0 */
+ while (j--)
+ {
+ if (use_redc)
+ mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ }
+#ifdef POWM_DEBUG
+ printf ("x^"); mpz_out_str (0, 10, exp);
+ printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
+ putchar ('\n');
+#endif
+ }
+
+ /* now convert back xx to xx/R^n */
+ if (use_redc)
+ {
+ mpz_set_ui (g[0], 1);
+ mpz_redc (xx, xx, g[0], mod, invm);
+ if (mpz_cmp (xx, mod) >= 0)
+ mpz_sub (xx, xx, mod);
+ }
+ mpz_set (res, xx);
+
+ mpz_clear (xx);
+ for (i = 0; i < K / 2; i++)
+ mpz_clear (g[i]);
+ (*_mp_free_func) (g, K / 2 * sizeof (mpz_t));
+}
diff --git a/rts/gmp/mpz/powm_ui.c b/rts/gmp/mpz/powm_ui.c
new file mode 100644
index 0000000000..00f70bd563
--- /dev/null
+++ b/rts/gmp/mpz/powm_ui.c
@@ -0,0 +1,248 @@
+/* mpz_powm_ui(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_powm_ui (mpz_ptr res, mpz_srcptr base, unsigned long int exp, mpz_srcptr mod)
+#else
+mpz_powm_ui (res, base, exp, mod)
+ mpz_ptr res;
+ mpz_srcptr base;
+ unsigned long int exp;
+ mpz_srcptr mod;
+#endif
+{
+ mp_ptr rp, mp, bp;
+ mp_size_t msize, bsize, rsize;
+ mp_size_t size;
+ int mod_shift_cnt;
+ int negative_result;
+ mp_limb_t *free_me = NULL;
+ size_t free_me_size;
+ TMP_DECL (marker);
+
+ msize = ABS (mod->_mp_size);
+ size = 2 * msize;
+
+ rp = res->_mp_d;
+
+ if (msize == 0)
+ DIVIDE_BY_ZERO;
+
+ if (exp == 0)
+ {
+ /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+ depending on if MOD equals 1. */
+ res->_mp_size = (msize == 1 && (mod->_mp_d)[0] == 1) ? 0 : 1;
+ rp[0] = 1;
+ return;
+ }
+
+ TMP_MARK (marker);
+
+ /* Normalize MOD (i.e. make its most significant bit set) as required by
+ mpn_divmod. This will make the intermediate values in the calculation
+ slightly larger, but the correct result is obtained after a final
+ reduction using the original MOD value. */
+
+ mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB);
+ count_leading_zeros (mod_shift_cnt, mod->_mp_d[msize - 1]);
+ if (mod_shift_cnt != 0)
+ mpn_lshift (mp, mod->_mp_d, msize, mod_shift_cnt);
+ else
+ MPN_COPY (mp, mod->_mp_d, msize);
+
+ bsize = ABS (base->_mp_size);
+ if (bsize > msize)
+ {
+ /* The base is larger than the module. Reduce it. */
+
+ /* Allocate (BSIZE + 1) with space for remainder and quotient.
+ (The quotient is (bsize - msize + 1) limbs.) */
+ bp = (mp_ptr) TMP_ALLOC ((bsize + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (bp, base->_mp_d, bsize);
+ /* We don't care about the quotient, store it above the remainder,
+ at BP + MSIZE. */
+ mpn_divmod (bp + msize, bp, bsize, mp, msize);
+ bsize = msize;
+ /* Canonicalize the base, since we are going to multiply with it
+ quite a few times. */
+ MPN_NORMALIZE (bp, bsize);
+ }
+ else
+ bp = base->_mp_d;
+
+ if (bsize == 0)
+ {
+ res->_mp_size = 0;
+ TMP_FREE (marker);
+ return;
+ }
+
+ if (res->_mp_alloc < size)
+ {
+ /* We have to allocate more space for RES. If any of the input
+ parameters are identical to RES, defer deallocation of the old
+ space. */
+
+ if (rp == mp || rp == bp)
+ {
+ free_me = rp;
+ free_me_size = res->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (rp, res->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ rp = (mp_ptr) (*_mp_allocate_func) (size * BYTES_PER_MP_LIMB);
+ res->_mp_alloc = size;
+ res->_mp_d = rp;
+ }
+ else
+ {
+ /* Make BASE, EXP and MOD not overlap with RES. */
+ if (rp == bp)
+ {
+ /* RES and BASE are identical. Allocate temp. space for BASE. */
+ bp = (mp_ptr) TMP_ALLOC (bsize * BYTES_PER_MP_LIMB);
+ MPN_COPY (bp, rp, bsize);
+ }
+ if (rp == mp)
+ {
+ /* RES and MOD are identical. Allocate temporary space for MOD. */
+ mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB);
+ MPN_COPY (mp, rp, msize);
+ }
+ }
+
+ MPN_COPY (rp, bp, bsize);
+ rsize = bsize;
+
+ {
+ mp_ptr xp = (mp_ptr) TMP_ALLOC (2 * (msize + 1) * BYTES_PER_MP_LIMB);
+ int c;
+ mp_limb_t e;
+ mp_limb_t carry_limb;
+
+ negative_result = (exp & 1) && base->_mp_size < 0;
+
+ e = exp;
+ count_leading_zeros (c, e);
+ e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
+ c = BITS_PER_MP_LIMB - 1 - c;
+
+ /* Main loop.
+
+ Make the result be pointed to alternately by XP and RP. This
+ helps us avoid block copying, which would otherwise be necessary
+ with the overlap restrictions of mpn_divmod. With 50% probability
+ the result after this loop will be in the area originally pointed
+ by RP (==RES->_mp_d), and with 50% probability in the area originally
+ pointed to by XP. */
+
+ while (c != 0)
+ {
+ mp_ptr tp;
+ mp_size_t xsize;
+
+ mpn_mul_n (xp, rp, rp, rsize);
+ xsize = 2 * rsize;
+ xsize -= xp[xsize - 1] == 0;
+ if (xsize > msize)
+ {
+ mpn_divmod (xp + msize, xp, xsize, mp, msize);
+ xsize = msize;
+ }
+
+ tp = rp; rp = xp; xp = tp;
+ rsize = xsize;
+
+ if ((mp_limb_signed_t) e < 0)
+ {
+ mpn_mul (xp, rp, rsize, bp, bsize);
+ xsize = rsize + bsize;
+ xsize -= xp[xsize - 1] == 0;
+ if (xsize > msize)
+ {
+ mpn_divmod (xp + msize, xp, xsize, mp, msize);
+ xsize = msize;
+ }
+
+ tp = rp; rp = xp; xp = tp;
+ rsize = xsize;
+ }
+ e <<= 1;
+ c--;
+ }
+
+ /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+ steps. Adjust the result by reducing it with the original MOD.
+
+ Also make sure the result is put in RES->_mp_d (where it already
+ might be, see above). */
+
+ if (mod_shift_cnt != 0)
+ {
+ carry_limb = mpn_lshift (res->_mp_d, rp, rsize, mod_shift_cnt);
+ rp = res->_mp_d;
+ if (carry_limb != 0)
+ {
+ rp[rsize] = carry_limb;
+ rsize++;
+ }
+ }
+ else
+ {
+ MPN_COPY (res->_mp_d, rp, rsize);
+ rp = res->_mp_d;
+ }
+
+ if (rsize >= msize)
+ {
+ mpn_divmod (rp + msize, rp, rsize, mp, msize);
+ rsize = msize;
+ }
+
+ /* Remove any leading zero words from the result. */
+ if (mod_shift_cnt != 0)
+ mpn_rshift (rp, rp, rsize, mod_shift_cnt);
+ MPN_NORMALIZE (rp, rsize);
+ }
+
+ if (negative_result && rsize != 0)
+ {
+ if (mod_shift_cnt != 0)
+ mpn_rshift (mp, mp, msize, mod_shift_cnt);
+ mpn_sub (rp, mp, msize, rp, rsize);
+ rsize = msize;
+ MPN_NORMALIZE (rp, rsize);
+ }
+ res->_mp_size = rsize;
+
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/pprime_p.c b/rts/gmp/mpz/pprime_p.c
new file mode 100644
index 0000000000..82eb678238
--- /dev/null
+++ b/rts/gmp/mpz/pprime_p.c
@@ -0,0 +1,242 @@
+/* mpz_probab_prime_p --
+ An implementation of the probabilistic primality test found in Knuth's
+ Seminumerical Algorithms book. If the function mpz_probab_prime_p()
+ returns 0 then n is not prime. If it returns 1, then n is 'probably'
+ prime. If it returns 2, n is surely prime. The probability of a false
+ positive is (1/4)**reps, where reps is the number of internal passes of the
+ probabilistic algorithm. Knuth indicates that 25 passes are reasonable.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc. Miller-Rabin code contributed by John Amanatides.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int isprime _PROTO ((unsigned long int t));
+static int mpz_millerrabin _PROTO ((mpz_srcptr n, int reps));
+
+int
+#if __STDC__
+mpz_probab_prime_p (mpz_srcptr n, int reps)
+#else
+mpz_probab_prime_p (n, reps)
+ mpz_srcptr n;
+ int reps;
+#endif
+{
+ mp_limb_t r;
+
+ /* Handle small and negative n. */
+ if (mpz_cmp_ui (n, 1000000L) <= 0)
+ {
+ int is_prime;
+ if (mpz_sgn (n) < 0)
+ {
+ /* Negative number. Negate and call ourselves. */
+ mpz_t n2;
+ mpz_init (n2);
+ mpz_neg (n2, n);
+ is_prime = mpz_probab_prime_p (n2, reps);
+ mpz_clear (n2);
+ return is_prime;
+ }
+ is_prime = isprime (mpz_get_ui (n));
+ return is_prime ? 2 : 0;
+ }
+
+ /* If n is now even, it is not a prime. */
+ if ((mpz_get_ui (n) & 1) == 0)
+ return 0;
+
+ /* Check if n has small factors. */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6))
+ r = mpn_preinv_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP, (mp_limb_t) PP_INVERTED);
+ else
+ r = mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP);
+ if (r % 3 == 0 || r % 5 == 0 || r % 7 == 0 || r % 11 == 0 || r % 13 == 0
+ || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0
+#if BITS_PER_MP_LIMB == 64
+ || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0
+ || r % 47 == 0 || r % 53 == 0
+#endif
+ )
+ {
+ return 0;
+ }
+
+ /* Do more dividing. We collect small primes, using umul_ppmm, until we
+ overflow a single limb. We divide our number by the small primes product,
+ and look for factors in the remainder. */
+ {
+ unsigned long int ln2;
+ unsigned long int q;
+ mp_limb_t p1, p0, p;
+ unsigned int primes[15];
+ int nprimes;
+
+ nprimes = 0;
+ p = 1;
+ ln2 = mpz_sizeinbase (n, 2) / 30; ln2 = ln2 * ln2;
+ for (q = BITS_PER_MP_LIMB == 64 ? 59 : 31; q < ln2; q += 2)
+ {
+ if (isprime (q))
+ {
+ umul_ppmm (p1, p0, p, q);
+ if (p1 != 0)
+ {
+ r = mpn_mod_1 (PTR(n), SIZ(n), p);
+ while (--nprimes >= 0)
+ if (r % primes[nprimes] == 0)
+ {
+ if (mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) primes[nprimes]) != 0)
+ abort ();
+ return 0;
+ }
+ p = q;
+ nprimes = 0;
+ }
+ else
+ {
+ p = p0;
+ }
+ primes[nprimes++] = q;
+ }
+ }
+ }
+
+ /* Perform a number of Miller-Rabin tests. */
+ return mpz_millerrabin (n, reps);
+}
+
+static int
+#if __STDC__
+isprime (unsigned long int t)
+#else
+isprime (t)
+ unsigned long int t;
+#endif
+{
+ unsigned long int q, r, d;
+
+ if (t < 3 || (t & 1) == 0)
+ return t == 2;
+
+ for (d = 3, r = 1; r != 0; d += 2)
+ {
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ }
+ return 0;
+}
+
+static int millerrabin _PROTO ((mpz_srcptr n, mpz_srcptr nm1,
+ mpz_ptr x, mpz_ptr y,
+ mpz_srcptr q, unsigned long int k));
+
+static int
+#if __STDC__
+mpz_millerrabin (mpz_srcptr n, int reps)
+#else
+mpz_millerrabin (n, reps)
+ mpz_srcptr n;
+ int reps;
+#endif
+{
+ int r;
+ mpz_t nm1, x, y, q;
+ unsigned long int k;
+ gmp_randstate_t rstate;
+ int is_prime;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (nm1, SIZ (n) + 1);
+ mpz_sub_ui (nm1, n, 1L);
+
+ MPZ_TMP_INIT (x, SIZ (n));
+ MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */
+
+ /* Perform a Fermat test. */
+ mpz_set_ui (x, 210L);
+ mpz_powm (y, x, nm1, n);
+ if (mpz_cmp_ui (y, 1L) != 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+
+ MPZ_TMP_INIT (q, SIZ (n));
+
+ /* Find q and k, where q is odd and n = 1 + 2**k * q. */
+ k = mpz_scan1 (nm1, 0L);
+ mpz_tdiv_q_2exp (q, nm1, k);
+
+ gmp_randinit (rstate, GMP_RAND_ALG_DEFAULT, 32L);
+
+ is_prime = 1;
+ for (r = 0; r < reps && is_prime; r++)
+ {
+ do
+ mpz_urandomb (x, rstate, mpz_sizeinbase (n, 2) - 1);
+ while (mpz_cmp_ui (x, 1L) <= 0);
+
+ is_prime = millerrabin (n, nm1, x, y, q, k);
+ }
+
+ gmp_randclear (rstate);
+
+ TMP_FREE (marker);
+ return is_prime;
+}
+
+static int
+#if __STDC__
+millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
+ mpz_srcptr q, unsigned long int k)
+#else
+millerrabin (n, nm1, x, y, q, k)
+ mpz_srcptr n;
+ mpz_srcptr nm1;
+ mpz_ptr x;
+ mpz_ptr y;
+ mpz_srcptr q;
+ unsigned long int k;
+#endif
+{
+ unsigned long int i;
+
+ mpz_powm (y, x, q, n);
+
+ if (mpz_cmp_ui (y, 1L) == 0 || mpz_cmp (y, nm1) == 0)
+ return 1;
+
+ for (i = 1; i < k; i++)
+ {
+ mpz_powm_ui (y, y, 2L, n);
+ if (mpz_cmp (y, nm1) == 0)
+ return 1;
+ if (mpz_cmp_ui (y, 1L) == 0)
+ return 0;
+ }
+ return 0;
+}
diff --git a/rts/gmp/mpz/random.c b/rts/gmp/mpz/random.c
new file mode 100644
index 0000000000..60d9113991
--- /dev/null
+++ b/rts/gmp/mpz/random.c
@@ -0,0 +1,56 @@
+/* mpz_random -- Generate a random mpz_t of specified size.
+ This function is non-portable and generates poor random numbers.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "urandom.h"
+
+void
+#if __STDC__
+mpz_random (mpz_ptr x, mp_size_t size)
+#else
+mpz_random (x, size)
+ mpz_ptr x;
+ mp_size_t size;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t ran;
+ mp_ptr xp;
+ mp_size_t abs_size;
+
+ abs_size = ABS (size);
+
+ if (x->_mp_alloc < abs_size)
+ _mpz_realloc (x, abs_size);
+
+ xp = x->_mp_d;
+
+ for (i = 0; i < abs_size; i++)
+ {
+ ran = urandom ();
+ xp[i] = ran;
+ }
+
+ MPN_NORMALIZE (xp, abs_size);
+ x->_mp_size = size < 0 ? -abs_size : abs_size;
+}
diff --git a/rts/gmp/mpz/random2.c b/rts/gmp/mpz/random2.c
new file mode 100644
index 0000000000..a90af115e9
--- /dev/null
+++ b/rts/gmp/mpz/random2.c
@@ -0,0 +1,48 @@
+/* mpz_random2 -- Generate a positive random mpz_t of specified size, with
+ long runs of consecutive ones and zeros in the binary representation.
+ Meant for testing of other MP routines.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_random2 (mpz_ptr x, mp_size_t size)
+#else
+mpz_random2 (x, size)
+ mpz_ptr x;
+ mp_size_t size;
+#endif
+{
+ mp_size_t abs_size;
+
+ abs_size = ABS (size);
+ if (abs_size != 0)
+ {
+ if (x->_mp_alloc < abs_size)
+ _mpz_realloc (x, abs_size);
+
+ mpn_random2 (x->_mp_d, abs_size);
+ }
+
+ x->_mp_size = size;
+}
diff --git a/rts/gmp/mpz/realloc.c b/rts/gmp/mpz/realloc.c
new file mode 100644
index 0000000000..0b9e447ec3
--- /dev/null
+++ b/rts/gmp/mpz/realloc.c
@@ -0,0 +1,52 @@
+/* _mpz_realloc -- make the mpz_t have NEW_SIZE digits allocated.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void *
+#if __STDC__
+_mpz_realloc (mpz_ptr m, mp_size_t new_size)
+#else
+_mpz_realloc (m, new_size)
+ mpz_ptr m;
+ mp_size_t new_size;
+#endif
+{
+ /* Never allocate zero space. */
+ if (new_size == 0)
+ new_size = 1;
+
+ m->_mp_d = (mp_ptr) (*_mp_reallocate_func) (m->_mp_d,
+ m->_mp_alloc * BYTES_PER_MP_LIMB,
+ new_size * BYTES_PER_MP_LIMB);
+ m->_mp_alloc = new_size;
+
+#if 0
+ /* This might break some code that reads the size field after
+ reallocation, in the case the reallocated destination and a
+ source argument are identical. */
+ if (ABS (m->_mp_size) > new_size)
+ m->_mp_size = 0;
+#endif
+
+ return (void *) m->_mp_d;
+}
diff --git a/rts/gmp/mpz/remove.c b/rts/gmp/mpz/remove.c
new file mode 100644
index 0000000000..bc6675f972
--- /dev/null
+++ b/rts/gmp/mpz/remove.c
@@ -0,0 +1,93 @@
+/* mpz_remove -- divide out a factor and return its multiplicity.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
+#else
+mpz_remove (dest, src, f)
+ mpz_ptr dest;
+ mpz_srcptr src;
+ mpz_srcptr f;
+#endif
+{
+ mpz_t fpow[40]; /* inexhaustible...until year 2020 or so */
+ mpz_t x, rem;
+ unsigned long int pwr;
+ int p;
+
+ if (mpz_cmp_ui (f, 1) <= 0 || mpz_sgn (src) == 0)
+ DIVIDE_BY_ZERO;
+ if (mpz_cmp_ui (f, 2) == 0)
+ {
+ unsigned long int s0;
+ s0 = mpz_scan1 (src, 0);
+ mpz_div_2exp (dest, src, s0);
+ return s0;
+ }
+
+ /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an
+ upper bound of the result we're seeking. We could also shift down the
+ operands so that they become odd, to make intermediate values smaller. */
+
+ mpz_init (rem);
+ mpz_init (x);
+
+ pwr = 0;
+ mpz_init (fpow[0]);
+ mpz_set (fpow[0], f);
+ mpz_set (dest, src);
+
+ /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k). */
+ for (p = 0;; p++)
+ {
+ mpz_tdiv_qr (x, rem, dest, fpow[p]);
+ if (SIZ (rem) != 0)
+ break;
+ mpz_init (fpow[p + 1]);
+ mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
+ mpz_set (dest, x);
+ }
+
+ pwr = (1 << p) - 1;
+
+ mpz_clear (fpow[p]);
+
+ /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
+ zero remainder. */
+ while (--p >= 0)
+ {
+ mpz_tdiv_qr (x, rem, dest, fpow[p]);
+ if (SIZ (rem) == 0)
+ {
+ pwr += 1 << p;
+ mpz_set (dest, x);
+ }
+ mpz_clear (fpow[p]);
+ }
+
+ mpz_clear (x);
+ mpz_clear (rem);
+ return pwr;
+}
diff --git a/rts/gmp/mpz/root.c b/rts/gmp/mpz/root.c
new file mode 100644
index 0000000000..0920bf22d3
--- /dev/null
+++ b/rts/gmp/mpz/root.c
@@ -0,0 +1,183 @@
+/* mpz_root(root, u, nth) -- Set ROOT to floor(U^(1/nth)).
+ Return an indication if the result is exact.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* Naive implementation of nth root extraction. It would probably be a
+ better idea to use a division-free Newton iteration. It is insane
+ to use full precision from iteration 1. The mpz_scan1 trick compensates
+ to some extent. It would be natural to avoid representing the low zero
+ bits mpz_scan1 is counting, and at the same time call mpn directly. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+int
+#if __STDC__
+mpz_root (mpz_ptr r, mpz_srcptr c, unsigned long int nth)
+#else
+mpz_root (r, c, nth)
+ mpz_ptr r;
+ mpz_srcptr c;
+ unsigned long int nth;
+#endif
+{
+ mpz_t x, t0, t1, t2;
+ __mpz_struct ccs, *cc = &ccs;
+ unsigned long int nbits;
+ int bit;
+ int exact;
+ int i;
+ unsigned long int lowz;
+ unsigned long int rl;
+
+ /* even roots of negatives provoke an exception */
+ if (mpz_sgn (c) < 0 && (nth & 1) == 0)
+ SQRT_OF_NEGATIVE;
+
+ /* root extraction interpreted as c^(1/nth) means a zeroth root should
+ provoke a divide by zero, do this even if c==0 */
+ if (nth == 0)
+ DIVIDE_BY_ZERO;
+
+ if (mpz_sgn (c) == 0)
+ {
+ if (r != NULL)
+ mpz_set_ui (r, 0);
+ return 1; /* exact result */
+ }
+
+ PTR(cc) = PTR(c);
+ SIZ(cc) = ABSIZ(c);
+
+ nbits = (mpz_sizeinbase (cc, 2) - 1) / nth;
+ if (nbits == 0)
+ {
+ if (r != NULL)
+ mpz_set_ui (r, 1);
+ if (mpz_sgn (c) < 0)
+ {
+ if (r != NULL)
+ SIZ(r) = -SIZ(r);
+ return mpz_cmp_si (c, -1L) == 0;
+ }
+ return mpz_cmp_ui (c, 1L) == 0;
+ }
+
+ mpz_init (x);
+ mpz_init (t0);
+ mpz_init (t1);
+ mpz_init (t2);
+
+ /* Create a one-bit approximation. */
+ mpz_set_ui (x, 0);
+ mpz_setbit (x, nbits);
+
+ /* Make the approximation better, one bit at a time. This odd-looking
+ termination criteria makes large nth get better initial approximation,
+ which avoids slow convergence for such values. */
+ bit = nbits - 1;
+ for (i = 1; (nth >> i) != 0; i++)
+ {
+ mpz_setbit (x, bit);
+ mpz_tdiv_q_2exp (t0, x, bit);
+ mpz_pow_ui (t1, t0, nth);
+ mpz_mul_2exp (t1, t1, bit * nth);
+ if (mpz_cmp (cc, t1) < 0)
+ mpz_clrbit (x, bit);
+
+ bit--; /* check/set next bit */
+ if (bit < 0)
+ {
+ /* We're done. */
+ mpz_pow_ui (t1, x, nth);
+ goto done;
+ }
+ }
+ mpz_setbit (x, bit);
+ mpz_set_ui (t2, 0); mpz_setbit (t2, bit); mpz_add (x, x, t2);
+
+#if DEBUG
+ /* Check that the starting approximation is >= than the root. */
+ mpz_pow_ui (t1, x, nth);
+ if (mpz_cmp (cc, t1) >= 0)
+ abort ();
+#endif
+
+ mpz_add_ui (x, x, 1);
+
+ /* Main loop */
+ do
+ {
+ lowz = mpz_scan1 (x, 0);
+ mpz_tdiv_q_2exp (t0, x, lowz);
+ mpz_pow_ui (t1, t0, nth - 1);
+ mpz_mul_2exp (t1, t1, lowz * (nth - 1));
+ mpz_tdiv_q (t2, cc, t1);
+ mpz_sub (t2, x, t2);
+ rl = mpz_tdiv_q_ui (t2, t2, nth);
+ mpz_sub (x, x, t2);
+ }
+ while (mpz_sgn (t2) != 0);
+
+ /* If we got a non-zero remainder in the last division, we know our root
+ is too large. */
+ mpz_sub_ui (x, x, (mp_limb_t) (rl != 0));
+
+ /* Adjustment loop. If we spend more care on rounding in the loop above,
+ we could probably get rid of this, or greatly simplify it. */
+ {
+ int bad = 0;
+ lowz = mpz_scan1 (x, 0);
+ mpz_tdiv_q_2exp (t0, x, lowz);
+ mpz_pow_ui (t1, t0, nth);
+ mpz_mul_2exp (t1, t1, lowz * nth);
+ while (mpz_cmp (cc, t1) < 0)
+ {
+ bad++;
+ if (bad > 2)
+ abort (); /* abort if our root is far off */
+ mpz_sub_ui (x, x, 1);
+ lowz = mpz_scan1 (x, 0);
+ mpz_tdiv_q_2exp (t0, x, lowz);
+ mpz_pow_ui (t1, t0, nth);
+ mpz_mul_2exp (t1, t1, lowz * nth);
+ }
+ }
+
+ done:
+ exact = mpz_cmp (t1, cc) == 0;
+
+ if (r != NULL)
+ {
+ mpz_set (r, x);
+ if (mpz_sgn (c) < 0)
+ SIZ(r) = -SIZ(r);
+ }
+
+ mpz_clear (t2);
+ mpz_clear (t1);
+ mpz_clear (t0);
+ mpz_clear (x);
+
+ return exact;
+}
diff --git a/rts/gmp/mpz/rrandomb.c b/rts/gmp/mpz/rrandomb.c
new file mode 100644
index 0000000000..7d78243674
--- /dev/null
+++ b/rts/gmp/mpz/rrandomb.c
@@ -0,0 +1,117 @@
+/* mpz_rrandomb -- Generate a positive random mpz_t of specified bit size, with
+ long runs of consecutive ones and zeros in the binary representation.
+ Meant for testing of other MP routines.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static void gmp_rrandomb _PROTO ((mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits));
+
+void
+#if __STDC__
+mpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+mpz_rrandomb (x, rstate, nbits)
+ mpz_ptr x;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ mp_size_t nl = 0;
+
+ if (nbits != 0)
+ {
+ mp_ptr xp;
+ nl = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ if (x->_mp_alloc < nl)
+ _mpz_realloc (x, nl);
+
+ xp = PTR(x);
+ gmp_rrandomb (xp, rstate, nbits);
+ MPN_NORMALIZE (xp, nl);
+ }
+
+ SIZ(x) = nl;
+}
+
+#define BITS_PER_CHUNK 4
+
+static void
+#if __STDC__
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+gmp_rrandomb (rp, rstate, nbits)
+ mp_ptr rp;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ int nb;
+ int bit_pos;
+ mp_size_t limb_pos;
+ mp_limb_t ran, ranm;
+ mp_limb_t acc;
+ mp_size_t n;
+
+ bit_pos = nbits % BITS_PER_MP_LIMB;
+ limb_pos = nbits / BITS_PER_MP_LIMB;
+ if (bit_pos == 0)
+ {
+ bit_pos = BITS_PER_MP_LIMB;
+ limb_pos--;
+ }
+
+ acc = 0;
+ while (limb_pos >= 0)
+ {
+ _gmp_rand (&ranm, rstate, BITS_PER_CHUNK + 1);
+ ran = ranm;
+ nb = (ran >> 1) + 1;
+ if ((ran & 1) != 0)
+ {
+ /* Generate a string of ones. */
+ if (nb > bit_pos)
+ {
+ rp[limb_pos--] = acc | ((((mp_limb_t) 1) << bit_pos) - 1);
+ bit_pos += BITS_PER_MP_LIMB;
+ bit_pos -= nb;
+ acc = (~(mp_limb_t) 0) << bit_pos;
+ }
+ else
+ {
+ bit_pos -= nb;
+ acc |= ((((mp_limb_t) 1) << nb) - 1) << bit_pos;
+ }
+ }
+ else
+ {
+ /* Generate a string of zeroes. */
+ if (nb > bit_pos)
+ {
+ rp[limb_pos--] = acc;
+ acc = 0;
+ bit_pos += BITS_PER_MP_LIMB;
+ }
+ bit_pos -= nb;
+ }
+ }
+}
diff --git a/rts/gmp/mpz/scan0.c b/rts/gmp/mpz/scan0.c
new file mode 100644
index 0000000000..6c59cf8939
--- /dev/null
+++ b/rts/gmp/mpz/scan0.c
@@ -0,0 +1,35 @@
+/* mpz_scan0(op, startbit) -- Scan for the next set bit, starting at startbit.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_scan0 (mpz_srcptr u, unsigned long int starting_bit)
+#else
+mpz_scan0 (u, starting_bit)
+ mpz_srcptr u;
+ unsigned long int starting_bit;
+#endif
+{
+ return mpn_scan0 (u->_mp_d, starting_bit);
+}
diff --git a/rts/gmp/mpz/scan1.c b/rts/gmp/mpz/scan1.c
new file mode 100644
index 0000000000..3b84e3420c
--- /dev/null
+++ b/rts/gmp/mpz/scan1.c
@@ -0,0 +1,35 @@
+/* mpz_scan1(op, startbit) -- Scan for the next set bit, starting at startbit.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_scan1 (mpz_srcptr u, unsigned long int starting_bit)
+#else
+mpz_scan1 (u, starting_bit)
+ mpz_srcptr u;
+ unsigned long int starting_bit;
+#endif
+{
+ return mpn_scan1 (u->_mp_d, starting_bit);
+}
diff --git a/rts/gmp/mpz/set.c b/rts/gmp/mpz/set.c
new file mode 100644
index 0000000000..06b2eef511
--- /dev/null
+++ b/rts/gmp/mpz/set.c
@@ -0,0 +1,48 @@
+/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_set (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+
+ usize = u->_mp_size;
+ size = ABS (usize);
+
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ w->_mp_size = usize;
+}
diff --git a/rts/gmp/mpz/set_d.c b/rts/gmp/mpz/set_d.c
new file mode 100644
index 0000000000..e90ed9bc2f
--- /dev/null
+++ b/rts/gmp/mpz/set_d.c
@@ -0,0 +1,96 @@
+/* mpz_set_d(integer, val) -- Assign INTEGER with a double value VAL.
+
+Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_d (mpz_ptr r, double d)
+#else
+mpz_set_d (r, d)
+ mpz_ptr r;
+ double d;
+#endif
+{
+ int negative;
+ mp_limb_t tp[3];
+ mp_ptr rp;
+ mp_size_t rn;
+
+ negative = d < 0;
+ d = ABS (d);
+
+ /* Handle small arguments quickly. */
+ if (d < MP_BASE_AS_DOUBLE)
+ {
+ mp_limb_t tmp;
+ tmp = d;
+ PTR(r)[0] = tmp;
+ SIZ(r) = negative ? -(tmp != 0) : (tmp != 0);
+ return;
+ }
+
+ rn = __gmp_extract_double (tp, d);
+
+ if (ALLOC(r) < rn)
+ _mpz_realloc (r, rn);
+
+ rp = PTR (r);
+
+#if BITS_PER_MP_LIMB == 32
+ switch (rn)
+ {
+ default:
+ MPN_ZERO (rp, rn - 3);
+ rp += rn - 3;
+ /* fall through */
+ case 3:
+ rp[2] = tp[2];
+ rp[1] = tp[1];
+ rp[0] = tp[0];
+ break;
+ case 2:
+ rp[1] = tp[2];
+ rp[0] = tp[1];
+ break;
+ case 1:
+ /* handled in "small aguments" case above */
+ abort ();
+ }
+#else
+ switch (rn)
+ {
+ default:
+ MPN_ZERO (rp, rn - 2);
+ rp += rn - 2;
+ /* fall through */
+ case 2:
+ rp[1] = tp[1], rp[0] = tp[0];
+ break;
+ case 1:
+ /* handled in "small aguments" case above */
+ abort ();
+ }
+#endif
+
+ SIZ(r) = negative ? -rn : rn;
+}
diff --git a/rts/gmp/mpz/set_f.c b/rts/gmp/mpz/set_f.c
new file mode 100644
index 0000000000..2273953dfd
--- /dev/null
+++ b/rts/gmp/mpz/set_f.c
@@ -0,0 +1,64 @@
+/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_f (mpz_ptr w, mpf_srcptr u)
+#else
+mpz_set_f (w, u)
+ mpz_ptr w;
+ mpf_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+ mp_exp_t exp;
+
+ usize = SIZ (u);
+ size = ABS (usize);
+ exp = EXP (u);
+
+ if (w->_mp_alloc < exp)
+ _mpz_realloc (w, exp);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ if (exp <= 0)
+ {
+ SIZ (w) = 0;
+ return;
+ }
+ if (exp < size)
+ {
+ MPN_COPY (wp, up + size - exp, exp);
+ }
+ else
+ {
+ MPN_ZERO (wp, exp - size);
+ MPN_COPY (wp + exp - size, up, size);
+ }
+
+ w->_mp_size = usize >= 0 ? exp : -exp;
+}
diff --git a/rts/gmp/mpz/set_q.c b/rts/gmp/mpz/set_q.c
new file mode 100644
index 0000000000..72d3222a80
--- /dev/null
+++ b/rts/gmp/mpz/set_q.c
@@ -0,0 +1,36 @@
+/* mpz_set_q (dest_integer, src_rational) -- Assign DEST_INTEGER from
+ SRC_rational.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_q (mpz_ptr w, mpq_srcptr u)
+#else
+mpz_set_q (w, u)
+ mpz_ptr w;
+ mpq_srcptr u;
+#endif
+{
+ mpz_tdiv_q (w, mpq_numref (u), mpq_denref (u));
+}
diff --git a/rts/gmp/mpz/set_si.c b/rts/gmp/mpz/set_si.c
new file mode 100644
index 0000000000..9ba2fbaf30
--- /dev/null
+++ b/rts/gmp/mpz/set_si.c
@@ -0,0 +1,48 @@
+/* mpz_set_si(integer, val) -- Assign INTEGER with a small value VAL.
+
+Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_si (mpz_ptr dest, signed long int val)
+#else
+mpz_set_si (dest, val)
+ mpz_ptr dest;
+ signed long int val;
+#endif
+{
+ /* We don't check if the allocation is enough, since the rest of the
+ package ensures it's at least 1, which is what we need here. */
+ if (val > 0)
+ {
+ dest->_mp_d[0] = val;
+ dest->_mp_size = 1;
+ }
+ else if (val < 0)
+ {
+ dest->_mp_d[0] = (unsigned long) -val;
+ dest->_mp_size = -1;
+ }
+ else
+ dest->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/set_str.c b/rts/gmp/mpz/set_str.c
new file mode 100644
index 0000000000..3ab79c0e89
--- /dev/null
+++ b/rts/gmp/mpz/set_str.c
@@ -0,0 +1,157 @@
+/* mpz_set_str(mp_dest, string, base) -- Convert the \0-terminated
+ string STRING in base BASE to multiple precision integer in
+ MP_DEST. Allow white space in the string. If BASE == 0 determine
+ the base in the C standard way, i.e. 0xhh...h means base 16,
+ 0oo...o means base 8, otherwise assume base 10.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <string.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int
+#if __STDC__
+digit_value_in_base (int c, int base)
+#else
+digit_value_in_base (c, base)
+ int c;
+ int base;
+#endif
+{
+ int digit;
+
+ if (isdigit (c))
+ digit = c - '0';
+ else if (islower (c))
+ digit = c - 'a' + 10;
+ else if (isupper (c))
+ digit = c - 'A' + 10;
+ else
+ return -1;
+
+ if (digit < base)
+ return digit;
+ return -1;
+}
+
+int
+#if __STDC__
+mpz_set_str (mpz_ptr x, const char *str, int base)
+#else
+mpz_set_str (x, str, base)
+ mpz_ptr x;
+ const char *str;
+ int base;
+#endif
+{
+ size_t str_size;
+ char *s, *begs;
+ size_t i;
+ mp_size_t xsize;
+ int c;
+ int negative;
+ TMP_DECL (marker);
+
+ /* Skip whitespace. */
+ do
+ c = *str++;
+ while (isspace (c));
+
+ negative = 0;
+ if (c == '-')
+ {
+ negative = 1;
+ c = *str++;
+ }
+
+ if (digit_value_in_base (c, base == 0 ? 10 : base) < 0)
+ return -1; /* error if no digits */
+
+ /* If BASE is 0, try to find out the base by looking at the initial
+ characters. */
+ if (base == 0)
+ {
+ base = 10;
+ if (c == '0')
+ {
+ base = 8;
+ c = *str++;
+ if (c == 'x' || c == 'X')
+ {
+ base = 16;
+ c = *str++;
+ }
+ else if (c == 'b' || c == 'B')
+ {
+ base = 2;
+ c = *str++;
+ }
+ }
+ }
+
+ /* Skip leading zeros. */
+ while (c == '0')
+ c = *str++;
+ /* Make sure the string does not become empty, mpn_set_str would fail. */
+ if (c == 0)
+ {
+ x->_mp_size = 0;
+ return 0;
+ }
+
+ TMP_MARK (marker);
+ str_size = strlen (str - 1);
+ s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+ /* Remove spaces from the string and convert the result from ASCII to a
+ byte array. */
+ for (i = 0; i < str_size; i++)
+ {
+ if (!isspace (c))
+ {
+ int dig = digit_value_in_base (c, base);
+ if (dig < 0)
+ {
+ TMP_FREE (marker);
+ return -1;
+ }
+ *s++ = dig;
+ }
+ c = *str++;
+ }
+
+ str_size = s - begs;
+
+ xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly))
+ / BITS_PER_MP_LIMB + 2);
+ if (x->_mp_alloc < xsize)
+ _mpz_realloc (x, xsize);
+
+ /* Convert the byte array in base BASE to our bignum format. */
+ xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base);
+ x->_mp_size = negative ? -xsize : xsize;
+
+ TMP_FREE (marker);
+ return 0;
+}
diff --git a/rts/gmp/mpz/set_ui.c b/rts/gmp/mpz/set_ui.c
new file mode 100644
index 0000000000..d6097c170a
--- /dev/null
+++ b/rts/gmp/mpz/set_ui.c
@@ -0,0 +1,43 @@
+/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_ui (mpz_ptr dest, unsigned long int val)
+#else
+mpz_set_ui (dest, val)
+ mpz_ptr dest;
+ unsigned long int val;
+#endif
+{
+ /* We don't check if the allocation is enough, since the rest of the
+ package ensures it's at least 1, which is what we need here. */
+ if (val > 0)
+ {
+ dest->_mp_d[0] = val;
+ dest->_mp_size = 1;
+ }
+ else
+ dest->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/setbit.c b/rts/gmp/mpz/setbit.c
new file mode 100644
index 0000000000..d4249a434e
--- /dev/null
+++ b/rts/gmp/mpz/setbit.c
@@ -0,0 +1,119 @@
+/* mpz_setbit -- set a specified bit.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1997, 1999 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_setbit (mpz_ptr d, unsigned long int bit_index)
+#else
+mpz_setbit (d, bit_index)
+ mpz_ptr d;
+ unsigned long int bit_index;
+#endif
+{
+ mp_size_t dsize = d->_mp_size;
+ mp_ptr dp = d->_mp_d;
+ mp_size_t limb_index;
+
+ limb_index = bit_index / BITS_PER_MP_LIMB;
+ if (dsize >= 0)
+ {
+ if (limb_index < dsize)
+ {
+ dp[limb_index] |= (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ d->_mp_size = dsize;
+ }
+ else
+ {
+ /* Ugh. The bit should be set outside of the end of the
+ number. We have to increase the size of the number. */
+ if (d->_mp_alloc < limb_index + 1)
+ {
+ _mpz_realloc (d, limb_index + 1);
+ dp = d->_mp_d;
+ }
+ MPN_ZERO (dp + dsize, limb_index - dsize);
+ dp[limb_index] = (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ d->_mp_size = limb_index + 1;
+ }
+ }
+ else
+ {
+ mp_size_t zero_bound;
+
+ /* Simulate two's complement arithmetic, i.e. simulate
+ 1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+ 2. Set the bit.
+ 3. Set OP = ~OP + 1. */
+
+ dsize = -dsize;
+
+ /* No upper bound on this loop, we're sure there's a non-zero limb
+ sooner ot later. */
+ for (zero_bound = 0; ; zero_bound++)
+ if (dp[zero_bound] != 0)
+ break;
+
+ if (limb_index > zero_bound)
+ {
+ if (limb_index < dsize)
+ dp[limb_index] &= ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB));
+ else
+ ;
+ }
+ else if (limb_index == zero_bound)
+ {
+ dp[limb_index] = ((dp[limb_index] - 1)
+ & ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB))) + 1;
+ if (dp[limb_index] == 0)
+ {
+ mp_size_t i;
+ for (i = limb_index + 1; i < dsize; i++)
+ {
+ dp[i] += 1;
+ if (dp[i] != 0)
+ goto fin;
+ }
+ /* We got carry all way out beyond the end of D. Increase
+ its size (and allocation if necessary). */
+ dsize++;
+ if (d->_mp_alloc < dsize)
+ {
+ _mpz_realloc (d, dsize);
+ dp = d->_mp_d;
+ }
+ dp[i] = 1;
+ d->_mp_size = -dsize;
+ fin:;
+ }
+ }
+ else
+ {
+ mpn_decr_u (dp + limb_index,
+ (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB));
+ dsize -= dp[dsize - 1] == 0;
+ d->_mp_size = -dsize;
+ }
+ }
+}
diff --git a/rts/gmp/mpz/size.c b/rts/gmp/mpz/size.c
new file mode 100644
index 0000000000..6574756783
--- /dev/null
+++ b/rts/gmp/mpz/size.c
@@ -0,0 +1,35 @@
+/* mpz_size(x) -- return the number of lims currently used by the
+ value of integer X.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_size (mpz_srcptr x)
+#else
+mpz_size (x)
+ mpz_srcptr x;
+#endif
+{
+ return ABS (x->_mp_size);
+}
diff --git a/rts/gmp/mpz/sizeinbase.c b/rts/gmp/mpz/sizeinbase.c
new file mode 100644
index 0000000000..734f9c4532
--- /dev/null
+++ b/rts/gmp/mpz/sizeinbase.c
@@ -0,0 +1,60 @@
+/* mpz_sizeinbase(x, base) -- return an approximation to the number of
+ character the integer X would have printed in base BASE. The
+ approximation is never too small.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+size_t
+#if __STDC__
+mpz_sizeinbase (mpz_srcptr x, int base)
+#else
+mpz_sizeinbase (x, base)
+ mpz_srcptr x;
+ int base;
+#endif
+{
+ mp_size_t size = ABS (x->_mp_size);
+ int lb_base, cnt;
+ size_t totbits;
+
+ /* Special case for X == 0. */
+ if (size == 0)
+ return 1;
+
+ /* Calculate the total number of significant bits of X. */
+ count_leading_zeros (cnt, x->_mp_d[size - 1]);
+ totbits = size * BITS_PER_MP_LIMB - cnt;
+
+ if ((base & (base - 1)) == 0)
+ {
+ /* Special case for powers of 2, giving exact result. */
+
+ count_leading_zeros (lb_base, base);
+ lb_base = BITS_PER_MP_LIMB - lb_base - 1;
+
+ return (totbits + lb_base - 1) / lb_base;
+ }
+ else
+ return (size_t) (totbits * __mp_bases[base].chars_per_bit_exactly) + 1;
+}
diff --git a/rts/gmp/mpz/sqrt.c b/rts/gmp/mpz/sqrt.c
new file mode 100644
index 0000000000..fe82fe407a
--- /dev/null
+++ b/rts/gmp/mpz/sqrt.c
@@ -0,0 +1,86 @@
+/* mpz_sqrt(root, u) -- Set ROOT to floor(sqrt(U)).
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_sqrt (mpz_ptr root, mpz_srcptr op)
+#else
+mpz_sqrt (root, op)
+ mpz_ptr root;
+ mpz_srcptr op;
+#endif
+{
+ mp_size_t op_size, root_size;
+ mp_ptr root_ptr, op_ptr;
+ mp_ptr free_me = NULL;
+ mp_size_t free_me_size;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op_size = op->_mp_size;
+ if (op_size < 0)
+ SQRT_OF_NEGATIVE;
+
+ /* The size of the root is accurate after this simple calculation. */
+ root_size = (op_size + 1) / 2;
+
+ root_ptr = root->_mp_d;
+ op_ptr = op->_mp_d;
+
+ if (root->_mp_alloc < root_size)
+ {
+ if (root_ptr == op_ptr)
+ {
+ free_me = root_ptr;
+ free_me_size = root->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ root->_mp_alloc = root_size;
+ root_ptr = (mp_ptr) (*_mp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
+ root->_mp_d = root_ptr;
+ }
+ else
+ {
+ /* Make OP not overlap with ROOT. */
+ if (root_ptr == op_ptr)
+ {
+ /* ROOT and OP are identical. Allocate temporary space for OP. */
+ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB);
+ /* Copy to the temporary space. Hack: Avoid temporary variable
+ by using ROOT_PTR. */
+ MPN_COPY (op_ptr, root_ptr, op_size);
+ }
+ }
+
+ mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+
+ root->_mp_size = root_size;
+
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/sqrtrem.c b/rts/gmp/mpz/sqrtrem.c
new file mode 100644
index 0000000000..99a6453122
--- /dev/null
+++ b/rts/gmp/mpz/sqrtrem.c
@@ -0,0 +1,111 @@
+/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM
+ to the remainder, i.e. X - ROOT**2.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)
+#else
+mpz_sqrtrem (root, rem, op)
+ mpz_ptr root;
+ mpz_ptr rem;
+ mpz_srcptr op;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+msqrt (mpz_srcptr op, mpz_ptr root, mpz_ptr rem)
+#else
+msqrt (op, root, rem)
+ mpz_srcptr op;
+ mpz_ptr root;
+ mpz_ptr rem;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_size_t op_size, root_size, rem_size;
+ mp_ptr root_ptr, op_ptr;
+ mp_ptr free_me = NULL;
+ mp_size_t free_me_size;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op_size = op->_mp_size;
+ if (op_size < 0)
+ SQRT_OF_NEGATIVE;
+
+ if (rem->_mp_alloc < op_size)
+ _mpz_realloc (rem, op_size);
+
+ /* The size of the root is accurate after this simple calculation. */
+ root_size = (op_size + 1) / 2;
+
+ root_ptr = root->_mp_d;
+ op_ptr = op->_mp_d;
+
+ if (root->_mp_alloc < root_size)
+ {
+ if (root_ptr == op_ptr)
+ {
+ free_me = root_ptr;
+ free_me_size = root->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ root->_mp_alloc = root_size;
+ root_ptr = (mp_ptr) (*_mp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
+ root->_mp_d = root_ptr;
+ }
+ else
+ {
+ /* Make OP not overlap with ROOT. */
+ if (root_ptr == op_ptr)
+ {
+ /* ROOT and OP are identical. Allocate temporary space for OP. */
+ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB);
+ /* Copy to the temporary space. Hack: Avoid temporary variable
+ by using ROOT_PTR. */
+ MPN_COPY (op_ptr, root_ptr, op_size);
+ }
+ }
+
+ rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size);
+
+ root->_mp_size = root_size;
+
+ /* Write remainder size last, to enable us to define this function to
+ give only the square root remainder, if the user calls if with
+ ROOT == REM. */
+ rem->_mp_size = rem_size;
+
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/sub.c b/rts/gmp/mpz/sub.c
new file mode 100644
index 0000000000..f3ae7c23a0
--- /dev/null
+++ b/rts/gmp/mpz/sub.c
@@ -0,0 +1,123 @@
+/* mpz_sub -- Subtract two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_sub (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_sub (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+msub (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#else
+msub (u, v, w)
+ mpz_srcptr u;
+ mpz_srcptr v;
+ mpz_ptr w;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_srcptr up, vp;
+ mp_ptr wp;
+ mp_size_t usize, vsize, wsize;
+ mp_size_t abs_usize;
+ mp_size_t abs_vsize;
+
+ usize = u->_mp_size;
+ vsize = -v->_mp_size; /* The "-" makes the difference from mpz_add */
+ abs_usize = ABS (usize);
+ abs_vsize = ABS (vsize);
+
+ if (abs_usize < abs_vsize)
+ {
+ /* Swap U and V. */
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (usize, vsize);
+ MP_SIZE_T_SWAP (abs_usize, abs_vsize);
+ }
+
+ /* True: ABS_USIZE >= ABS_VSIZE. */
+
+ /* If not space for w (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (u or v may be the same as w). */
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ wp = w->_mp_d;
+
+ if ((usize ^ vsize) < 0)
+ {
+ /* U and V have different sign. Need to compare them to determine
+ which operand to subtract from which. */
+
+ /* This test is right since ABS_USIZE >= ABS_VSIZE. */
+ if (abs_usize != abs_vsize)
+ {
+ mpn_sub (wp, up, abs_usize, vp, abs_vsize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ else if (mpn_cmp (up, vp, abs_usize) < 0)
+ {
+ mpn_sub_n (wp, vp, up, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize >= 0)
+ wsize = -wsize;
+ }
+ else
+ {
+ mpn_sub_n (wp, up, vp, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ }
+ else
+ {
+ /* U and V have same sign. Add them. */
+ mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);
+ wp[abs_usize] = cy_limb;
+ wsize = abs_usize + cy_limb;
+ if (usize < 0)
+ wsize = -wsize;
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/sub_ui.c b/rts/gmp/mpz/sub_ui.c
new file mode 100644
index 0000000000..327add8503
--- /dev/null
+++ b/rts/gmp/mpz/sub_ui.c
@@ -0,0 +1,84 @@
+/* mpz_sub_ui -- Subtract an unsigned one-word integer from an MP_INT.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_sub_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+#else
+mpz_sub_ui (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int v;
+#endif
+{
+ mp_srcptr up;
+ mp_ptr wp;
+ mp_size_t usize, wsize;
+ mp_size_t abs_usize;
+
+ usize = u->_mp_size;
+ abs_usize = ABS (usize);
+
+ /* If not space for W (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (U may be the same as W). */
+ up = u->_mp_d;
+ wp = w->_mp_d;
+
+ if (abs_usize == 0)
+ {
+ wp[0] = v;
+ w->_mp_size = -(v != 0);
+ return;
+ }
+
+ if (usize < 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) v);
+ wp[abs_usize] = cy;
+ wsize = -(abs_usize + cy);
+ }
+ else
+ {
+ /* The signs are different. Need exact comparison to determine
+ which operand to subtract from which. */
+ if (abs_usize == 1 && up[0] < v)
+ {
+ wp[0] = v - up[0];
+ wsize = -1;
+ }
+ else
+ {
+ mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) v);
+ /* Size can decrease with at most one limb. */
+ wsize = abs_usize - (wp[abs_usize - 1] == 0);
+ }
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/swap.c b/rts/gmp/mpz/swap.c
new file mode 100644
index 0000000000..0070d6ff24
--- /dev/null
+++ b/rts/gmp/mpz/swap.c
@@ -0,0 +1,52 @@
+/* mpz_swap (dest_integer, src_integer) -- Swap U and V.
+
+Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_swap (mpz_ptr u, mpz_ptr v)
+#else
+mpz_swap (u, v)
+ mpz_ptr u;
+ mpz_ptr v;
+#endif
+{
+ mp_ptr up, vp;
+ mp_size_t usize, vsize;
+ mp_size_t ualloc, valloc;
+
+ ualloc = u->_mp_alloc;
+ valloc = v->_mp_alloc;
+ v->_mp_alloc = ualloc;
+ u->_mp_alloc = valloc;
+
+ usize = u->_mp_size;
+ vsize = v->_mp_size;
+ v->_mp_size = usize;
+ u->_mp_size = vsize;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ v->_mp_d = up;
+ u->_mp_d = vp;
+}
diff --git a/rts/gmp/mpz/tdiv_q.c b/rts/gmp/mpz/tdiv_q.c
new file mode 100644
index 0000000000..21db4ab385
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_q.c
@@ -0,0 +1,91 @@
+/* mpz_tdiv_q -- divide two integers and produce a quotient.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_tdiv_q (quot, num, den)
+ mpz_ptr quot;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+{
+ mp_size_t ql;
+ mp_size_t ns, ds, nl, dl;
+ mp_ptr np, dp, qp, rp;
+ TMP_DECL (marker);
+
+ ns = SIZ (num);
+ ds = SIZ (den);
+ nl = ABS (ns);
+ dl = ABS (ds);
+ ql = nl - dl + 1;
+
+ if (dl == 0)
+ DIVIDE_BY_ZERO;
+
+ if (ql <= 0)
+ {
+ SIZ (quot) = 0;
+ return;
+ }
+
+ MPZ_REALLOC (quot, ql);
+
+ TMP_MARK (marker);
+ qp = PTR (quot);
+ rp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ np = PTR (num);
+ dp = PTR (den);
+
+ /* FIXME: We should think about how to handle the temporary allocation.
+ Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+ allocate temp space. */
+
+ /* Copy denominator to temporary space if it overlaps with the quotient. */
+ if (dp == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, dl);
+ dp = tp;
+ }
+ /* Copy numerator to temporary space if it overlaps with the quotient. */
+ if (np == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, np, nl);
+ np = tp;
+ }
+
+ mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+ ql -= qp[ql - 1] == 0;
+
+ SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/tdiv_q_2exp.c b/rts/gmp/mpz/tdiv_q_2exp.c
new file mode 100644
index 0000000000..03d1e01f89
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_q_2exp.c
@@ -0,0 +1,68 @@
+/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient
+ towards -infinity.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
+#else
+mpz_tdiv_q_2exp (w, u, cnt)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t usize, wsize;
+ mp_size_t limb_cnt;
+
+ usize = u->_mp_size;
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+ wsize = ABS (usize) - limb_cnt;
+ if (wsize <= 0)
+ w->_mp_size = 0;
+ else
+ {
+ mp_ptr wp;
+ mp_srcptr up;
+
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ cnt %= BITS_PER_MP_LIMB;
+ if (cnt != 0)
+ {
+ mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+ wsize -= wp[wsize - 1] == 0;
+ }
+ else
+ {
+ MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+ }
+
+ w->_mp_size = usize >= 0 ? wsize : -wsize;
+ }
+}
diff --git a/rts/gmp/mpz/tdiv_q_ui.c b/rts/gmp/mpz/tdiv_q_ui.c
new file mode 100644
index 0000000000..a2e3462b76
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_q_ui.c
@@ -0,0 +1,64 @@
+/* mpz_tdiv_q_ui(quot, dividend, divisor_limb)
+ -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_q_ui (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb
+ = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ /* The quotient is SIZE limbs, but the most significant might be zero. */
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tdiv_qr.c b/rts/gmp/mpz/tdiv_qr.c
new file mode 100644
index 0000000000..d66f57d9e5
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_qr.c
@@ -0,0 +1,130 @@
+/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,
+ and REM to DIVIDEND mod DIVISOR.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+#ifndef BERKELEY_MP
+
+void
+#if __STDC__
+mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_tdiv_qr (quot, rem, num, den)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+
+#else /* BERKELEY_MP */
+
+void
+#if __STDC__
+mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
+#else
+mdiv (num, den, quot, rem)
+ mpz_srcptr num;
+ mpz_srcptr den;
+ mpz_ptr quot;
+ mpz_ptr rem;
+#endif
+
+#endif /* BERKELEY_MP */
+{
+ mp_size_t ql;
+ mp_size_t ns, ds, nl, dl;
+ mp_ptr np, dp, qp, rp;
+ TMP_DECL (marker);
+
+ ns = SIZ (num);
+ ds = SIZ (den);
+ nl = ABS (ns);
+ dl = ABS (ds);
+ ql = nl - dl + 1;
+
+ if (dl == 0)
+ DIVIDE_BY_ZERO;
+
+ MPZ_REALLOC (rem, dl);
+
+ if (ql <= 0)
+ {
+ if (num != rem)
+ {
+ mp_ptr np, rp;
+ np = PTR (num);
+ rp = PTR (rem);
+ MPN_COPY (rp, np, nl);
+ SIZ (rem) = SIZ (num);
+ }
+ /* This needs to follow the assignment to rem, in case the
+ numerator and quotient are the same. */
+ SIZ (quot) = 0;
+ return;
+ }
+
+ MPZ_REALLOC (quot, ql);
+
+ TMP_MARK (marker);
+ qp = PTR (quot);
+ rp = PTR (rem);
+ np = PTR (num);
+ dp = PTR (den);
+
+ /* FIXME: We should think about how to handle the temporary allocation.
+ Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+ allocate temp space. */
+
+ /* Copy denominator to temporary space if it overlaps with the quotient
+ or remainder. */
+ if (dp == rp || dp == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, dl);
+ dp = tp;
+ }
+ /* Copy numerator to temporary space if it overlaps with the quotient or
+ remainder. */
+ if (np == rp || np == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, np, nl);
+ np = tp;
+ }
+
+ mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+ ql -= qp[ql - 1] == 0;
+ MPN_NORMALIZE (rp, dl);
+
+ SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+ SIZ (rem) = ns >= 0 ? dl : -dl;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/tdiv_qr_ui.c b/rts/gmp/mpz/tdiv_qr_ui.c
new file mode 100644
index 0000000000..10368cd340
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_qr_ui.c
@@ -0,0 +1,76 @@
+/* mpz_tdiv_qr_ui(quot,rem,dividend,short_divisor) --
+ Set QUOT to DIVIDEND / SHORT_DIVISOR
+ and REM to DIVIDEND mod SHORT_DIVISOR.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_qr_ui (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb == 0)
+ rem->_mp_size = 0;
+ else
+ {
+ /* Store the single-limb remainder. We don't check if there's space
+ for just one limb, since no function ever makes zero space. */
+ rem->_mp_size = dividend_size >= 0 ? 1 : -1;
+ rem->_mp_d[0] = remainder_limb;
+ }
+
+ /* The quotient is SIZE limbs, but the most significant might be zero. */
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tdiv_r.c b/rts/gmp/mpz/tdiv_r.c
new file mode 100644
index 0000000000..9eb87dfabf
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_r.c
@@ -0,0 +1,98 @@
+/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_tdiv_r (rem, num, den)
+ mpz_ptr rem;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+{
+ mp_size_t ql;
+ mp_size_t ns, ds, nl, dl;
+ mp_ptr np, dp, qp, rp;
+ TMP_DECL (marker);
+
+ ns = SIZ (num);
+ ds = SIZ (den);
+ nl = ABS (ns);
+ dl = ABS (ds);
+ ql = nl - dl + 1;
+
+ if (dl == 0)
+ DIVIDE_BY_ZERO;
+
+ MPZ_REALLOC (rem, dl);
+
+ if (ql <= 0)
+ {
+ if (num != rem)
+ {
+ mp_ptr np, rp;
+ np = PTR (num);
+ rp = PTR (rem);
+ MPN_COPY (rp, np, nl);
+ SIZ (rem) = SIZ (num);
+ }
+ return;
+ }
+
+ TMP_MARK (marker);
+ qp = (mp_ptr) TMP_ALLOC (ql * BYTES_PER_MP_LIMB);
+ rp = PTR (rem);
+ np = PTR (num);
+ dp = PTR (den);
+
+ /* FIXME: We should think about how to handle the temporary allocation.
+ Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+ allocate temp space. */
+
+ /* Copy denominator to temporary space if it overlaps with the remainder. */
+ if (dp == rp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, dl);
+ dp = tp;
+ }
+ /* Copy numerator to temporary space if it overlaps with the remainder. */
+ if (np == rp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, np, nl);
+ np = tp;
+ }
+
+ mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+ MPN_NORMALIZE (rp, dl);
+
+ SIZ (rem) = ns >= 0 ? dl : -dl;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/tdiv_r_2exp.c b/rts/gmp/mpz/tdiv_r_2exp.c
new file mode 100644
index 0000000000..91de170f5c
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_r_2exp.c
@@ -0,0 +1,79 @@
+/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, unsigned long int cnt)
+#else
+mpz_tdiv_r_2exp (res, in, cnt)
+ mpz_ptr res;
+ mpz_srcptr in;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t in_size = ABS (in->_mp_size);
+ mp_size_t res_size;
+ mp_size_t limb_cnt = cnt / BITS_PER_MP_LIMB;
+ mp_srcptr in_ptr = in->_mp_d;
+
+ if (in_size > limb_cnt)
+ {
+ /* The input operand is (probably) greater than 2**CNT. */
+ mp_limb_t x;
+
+ x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % BITS_PER_MP_LIMB) - 1);
+ if (x != 0)
+ {
+ res_size = limb_cnt + 1;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ res->_mp_d[limb_cnt] = x;
+ }
+ else
+ {
+ res_size = limb_cnt;
+ MPN_NORMALIZE (in_ptr, res_size);
+
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+ }
+ else
+ {
+ /* The input operand is smaller than 2**CNT. We perform a no-op,
+ apart from that we might need to copy IN to RES. */
+ res_size = in_size;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+
+ if (res != in)
+ MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
+ res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size;
+}
diff --git a/rts/gmp/mpz/tdiv_r_ui.c b/rts/gmp/mpz/tdiv_r_ui.c
new file mode 100644
index 0000000000..2ea411fda1
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_r_ui.c
@@ -0,0 +1,63 @@
+/* mpz_tdiv_r_ui(rem, dividend, divisor_limb)
+ -- Set REM to DIVDEND mod DIVISOR_LIMB.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_r_ui (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb == 0)
+ rem->_mp_size = 0;
+ else
+ {
+ /* Store the single-limb remainder. We don't check if there's space
+ for just one limb, since no function ever makes zero space. */
+ rem->_mp_size = dividend_size >= 0 ? 1 : -1;
+ rem->_mp_d[0] = remainder_limb;
+ }
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tdiv_ui.c b/rts/gmp/mpz/tdiv_ui.c
new file mode 100644
index 0000000000..7a40a6a7f7
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_ui.c
@@ -0,0 +1,53 @@
+/* mpz_tdiv_ui(dividend, divisor_limb)
+ -- Return DIVDEND mod DIVISOR_LIMB.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_ui (dividend, divisor)
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tstbit.c b/rts/gmp/mpz/tstbit.c
new file mode 100644
index 0000000000..b0a8b0b31a
--- /dev/null
+++ b/rts/gmp/mpz/tstbit.c
@@ -0,0 +1,70 @@
+/* mpz_tstbit -- test a specified bit. Simulate 2's complement representation.
+
+Copyright (C) 1997 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_tstbit (mpz_srcptr d, unsigned long int bit_index)
+#else
+mpz_tstbit (d, bit_index)
+ mpz_srcptr d;
+ unsigned long int bit_index;
+#endif
+{
+ mp_size_t dsize = d->_mp_size;
+ mp_ptr dp = d->_mp_d;
+ mp_size_t limb_index;
+
+ limb_index = bit_index / BITS_PER_MP_LIMB;
+ if (dsize >= 0)
+ {
+ if (limb_index < dsize)
+ return (dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1;
+ else
+ /* Testing a bit outside of a positive number. */
+ return 0;
+ }
+ else
+ {
+ mp_size_t zero_bound;
+
+ dsize = -dsize;
+
+ /* Locate the least significant non-zero limb. */
+ for (zero_bound = 0; dp[zero_bound] == 0; zero_bound++)
+ ;
+
+ if (limb_index > zero_bound)
+ {
+ if (limb_index < dsize)
+ return (~dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1;
+ else
+ /* Testing a bit outside of a negative number. */
+ return 1;
+ }
+ else if (limb_index == zero_bound)
+ return (-dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1;
+ else
+ return 0;
+ }
+}
diff --git a/rts/gmp/mpz/ui_pow_ui.c b/rts/gmp/mpz/ui_pow_ui.c
new file mode 100644
index 0000000000..edd2dee625
--- /dev/null
+++ b/rts/gmp/mpz/ui_pow_ui.c
@@ -0,0 +1,139 @@
+/* mpz_ui_pow_ui(res, base, exp) -- Set RES to BASE**EXP.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void mpz_pow2 _PROTO ((mpz_ptr r, mp_limb_t blimb, unsigned long int e, mp_limb_t rl));
+
+void
+#if __STDC__
+mpz_ui_pow_ui (mpz_ptr r, unsigned long int b, unsigned long int e)
+#else
+mpz_ui_pow_ui (r, b, e)
+ mpz_ptr r;
+ unsigned long int b;
+ unsigned long int e;
+#endif
+{
+ mp_limb_t blimb = b;
+ mp_limb_t rl;
+
+ if (e == 0)
+ {
+ /* For x^0 we return 1, even if x is 0. */
+ r->_mp_d[0] = 1;
+ r->_mp_size = 1;
+ return;
+ }
+
+ /* Compute b^e as (b^n)^(e div n) * b^(e mod n), where n is chosen such that
+ the latter factor is the largest number small enough to fit in a limb. */
+
+ rl = 1;
+ while (e != 0 && blimb < ((mp_limb_t) 1 << BITS_PER_MP_LIMB/2))
+ {
+ if ((e & 1) != 0)
+ rl = rl * blimb;
+ blimb = blimb * blimb;
+ e = e >> 1;
+ }
+
+ /* rl is now b^(e mod n). (I.e., the latter factor above.) */
+
+ if (e == 0)
+ {
+ r->_mp_d[0] = rl;
+ r->_mp_size = rl != 0;
+ return;
+ }
+
+ mpz_pow2 (r, blimb, e, rl);
+}
+
+/* Multi-precision part of expontialization code. */
+static void
+#if __STDC__
+mpz_pow2 (mpz_ptr r, mp_limb_t blimb, unsigned long int e, mp_limb_t rl)
+#else
+mpz_pow2 (r, blimb, e, rl)
+ mpz_ptr r;
+ mp_limb_t blimb;
+ unsigned long int e;
+ mp_limb_t rl;
+#endif
+{
+ mp_ptr rp, tp;
+ mp_size_t ralloc, rsize;
+ int cnt, i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* Over-estimate temporary space requirements somewhat. */
+ count_leading_zeros (cnt, blimb);
+ ralloc = e - cnt * e / BITS_PER_MP_LIMB + 1;
+
+ /* The two areas are used to alternatingly hold the input and receive the
+ product for mpn_mul. (Needed since mpn_mul_n requires that the product
+ is distinct from either input operand.) */
+ rp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+ tp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+
+ rp[0] = blimb;
+ rsize = 1;
+
+ count_leading_zeros (cnt, e);
+ for (i = BITS_PER_MP_LIMB - cnt - 2; i >= 0; i--)
+ {
+ mpn_mul_n (tp, rp, rp, rsize);
+ rsize = 2 * rsize;
+ rsize -= tp[rsize - 1] == 0;
+ MP_PTR_SWAP (rp, tp);
+
+ if ((e & ((mp_limb_t) 1 << i)) != 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_mul_1 (rp, rp, rsize, blimb);
+ rp[rsize] = cy;
+ rsize += cy != 0;
+ }
+ }
+
+ /* We will need rsize or rsize+1 limbs for the result. */
+ if (r->_mp_alloc <= rsize)
+ _mpz_realloc (r, rsize + 1);
+
+ /* Multiply the two factors (in rp,rsize and rl) and put the final result
+ in place. */
+ {
+ mp_limb_t cy;
+ cy = mpn_mul_1 (r->_mp_d, rp, rsize, rl);
+ (r->_mp_d)[rsize] = cy;
+ rsize += cy != 0;
+ }
+
+ r->_mp_size = rsize;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/urandomb.c b/rts/gmp/mpz/urandomb.c
new file mode 100644
index 0000000000..caca086e05
--- /dev/null
+++ b/rts/gmp/mpz/urandomb.c
@@ -0,0 +1,49 @@
+/* mpz_urandomb (rop, state, n) -- Generate a uniform pseudorandom
+ integer in the range 0 to 2^N - 1, inclusive, using STATE as the
+ random state previously initialized by a call to gmp_randinit().
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_urandomb (mpz_t rop, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+mpz_urandomb (rop, rstate, nbits)
+ mpz_t rop;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ mp_ptr rp;
+ mp_size_t size;
+
+ size = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ if (ALLOC (rop) < size)
+ _mpz_realloc (rop, size);
+
+ rp = PTR (rop);
+
+ _gmp_rand (rp, rstate, nbits);
+ MPN_NORMALIZE (rp, size);
+ SIZ (rop) = size;
+}
diff --git a/rts/gmp/mpz/urandomm.c b/rts/gmp/mpz/urandomm.c
new file mode 100644
index 0000000000..69e1bae78a
--- /dev/null
+++ b/rts/gmp/mpz/urandomm.c
@@ -0,0 +1,78 @@
+/* mpz_urandomm (rop, state, n) -- Generate a uniform pseudorandom
+ integer in the range 0 to N-1, using STATE as the random state
+ previously initialized by a call to gmp_randinit().
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_urandomm (mpz_t rop, gmp_randstate_t rstate, mpz_t n)
+#else
+mpz_urandomm (rop, rstate, n)
+ mpz_t rop;
+ gmp_randstate_t rstate;
+ mpz_t n;
+#endif
+{
+ mpz_t t, p, m;
+ mp_ptr tp;
+ mp_size_t nbits, size;
+ int count;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* FIXME: Should check for n == 0 and report error */
+
+ size = SIZ (n);
+ count_leading_zeros (count, PTR (n)[size - 1]);
+ nbits = size * BITS_PER_MP_LIMB - count;
+
+ /* Allocate enough for any mpz function called since a realloc of
+ these will fail. */
+ MPZ_TMP_INIT (t, size);
+ MPZ_TMP_INIT (m, size + 1);
+ MPZ_TMP_INIT (p, size + 1);
+
+ /* Let m = highest possible random number plus 1. */
+ mpz_set_ui (m, 0);
+ mpz_setbit (m, nbits);
+
+ /* Let p = floor(m / n) * n. */
+ mpz_fdiv_q (p, m, n);
+ mpz_mul (p, p, n);
+
+ tp = PTR (t);
+ do
+ {
+ _gmp_rand (tp, rstate, nbits);
+ MPN_NORMALIZE (tp, size); /* FIXME: Really necessary? */
+ SIZ (t) = size;
+ }
+ while (mpz_cmp (t, p) >= 0);
+
+ mpz_mod (rop, t, n);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/xor.c b/rts/gmp/mpz/xor.c
new file mode 100644
index 0000000000..69898d1791
--- /dev/null
+++ b/rts/gmp/mpz/xor.c
@@ -0,0 +1,217 @@
+/* mpz_xor -- Logical xor.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+#else
+mpz_xor (res, op1, op2)
+ mpz_ptr res;
+ mpz_srcptr op1;
+ mpz_srcptr op2;
+#endif
+{
+ mp_srcptr op1_ptr, op2_ptr;
+ mp_size_t op1_size, op2_size;
+ mp_ptr res_ptr;
+ mp_size_t res_size, res_alloc;
+ mp_size_t i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op1_size = op1->_mp_size;
+ op2_size = op2->_mp_size;
+
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+
+ if (op1_size >= 0)
+ {
+ if (op2_size >= 0)
+ {
+ if (op1_size >= op2_size)
+ {
+ if (res->_mp_alloc < op1_size)
+ {
+ _mpz_realloc (res, op1_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op1_ptr)
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ if (res->_mp_alloc < op2_size)
+ {
+ _mpz_realloc (res, op2_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op2_ptr)
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ MPN_NORMALIZE (res_ptr, res_size);
+ res->_mp_size = res_size;
+ return;
+ }
+ else /* op2_size < 0 */
+ {
+ /* Fall through to the code at the end of the function. */
+ }
+ }
+ else
+ {
+ if (op2_size < 0)
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+
+ /* Both operands are negative, the result will be positive.
+ (-OP1) ^ (-OP2) =
+ = ~(OP1 - 1) ^ ~(OP2 - 1) =
+ = (OP1 - 1) ^ (OP2 - 1) */
+
+ op1_size = -op1_size;
+ op2_size = -op2_size;
+
+ /* Possible optimization: Decrease mpn_sub precision,
+ as we won't use the entire res of both. */
+ opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+ op1_ptr = opx;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ res_alloc = MAX (op1_size, op2_size);
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to
+ temporary space--never to the space RES->_mp_d used
+ to point to before reallocation. */
+ }
+
+ if (op1_size > op2_size)
+ {
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ MPN_NORMALIZE (res_ptr, res_size);
+ res->_mp_size = res_size;
+ TMP_FREE (marker);
+ return;
+ }
+ else
+ {
+ /* We should compute -OP1 ^ OP2. Swap OP1 and OP2 and fall
+ through to the code that handles OP1 ^ -OP2. */
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ }
+ }
+
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+ mp_size_t count;
+
+ /* Operand 2 negative, so will be the result.
+ -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) =
+ = ~(OP1 ^ ~(OP2 - 1)) + 1 =
+ = (OP1 ^ (OP2 - 1)) + 1 */
+
+ op2_size = -op2_size;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ res_alloc = MAX (op1_size, op2_size) + 1;
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ op1_ptr = op1->_mp_d;
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ if (op1_size > op2_size)
+ {
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+
+ MPN_NORMALIZE (res_ptr, res_size);
+ res->_mp_size = -res_size;
+ TMP_FREE (marker);
+ }
+}
diff --git a/rts/gmp/rand.c b/rts/gmp/rand.c
new file mode 100644
index 0000000000..d1f9354511
--- /dev/null
+++ b/rts/gmp/rand.c
@@ -0,0 +1,171 @@
+/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#if __STDC__
+# include <stdarg.h>
+#else
+# include <varargs.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Array of CL-schemes, ordered in increasing order of the first
+ member (the 'm2exp' value). The end of the array is indicated with
+ an entry containing all zeros. */
+
+/* All multipliers are in the range 0.01*m and 0.99*m, and are
+congruent to 5 (mod 8).
+They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
+(Up to and including 196 bits, merit is >= 3.) */
+
+struct __gmp_rand_lc_scheme_struct
+{
+ unsigned long int m2exp; /* Modulus is 2 ^ m2exp. */
+ char *astr; /* Multiplier in string form. */
+ unsigned long int c; /* Adder. */
+};
+
+struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
+{
+ {32, "43840821", 1},
+ {33, "85943917", 1},
+ {34, "171799469", 1},
+ {35, "343825285", 1},
+ {36, "687285701", 1},
+ {37, "1374564613", 1},
+ {38, "2749193437", 1},
+ {39, "5497652029", 1},
+ {40, "10995212661", 1},
+ {56, "47988680294711517", 1},
+ {64, "13469374875402548381", 1},
+ {100, "203786806069096950756900463357", 1},
+ {128, "96573135900076068624591706046897650309", 1},
+ {156, "43051576988660538262511726153887323360449035333", 1},
+ {196, "1611627857640767981443524165616850972435303571524033586421", 1},
+ {200, "491824250216153841876046962368396460896019632211283945747141", 1},
+ {256, "79336254595106925775099152154558630917988041692672147726148065355845551082677", 1},
+ {0, NULL, 0} /* End of array. */
+};
+
+void
+#if __STDC__
+gmp_randinit (gmp_randstate_t rstate,
+ gmp_randalg_t alg,
+ ...)
+#else
+gmp_randinit (va_alist)
+ va_dcl
+#endif
+{
+ va_list ap;
+#if __STDC__
+#else
+ __gmp_randstate_struct *rstate;
+ gmp_randalg_t alg;
+#endif
+
+#if __STDC__
+ va_start (ap, alg);
+#else
+ va_start (ap);
+
+ rstate = va_arg (ap, __gmp_randstate_struct *);
+ alg = va_arg (ap, gmp_randalg_t);
+#endif
+
+ switch (alg)
+ {
+ case GMP_RAND_ALG_LC: /* Linear congruential. */
+ {
+ unsigned long int size;
+ struct __gmp_rand_lc_scheme_struct *sp;
+ mpz_t a;
+
+ size = va_arg (ap, unsigned long int);
+
+ /* Pick a scheme. */
+ for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
+ if (sp->m2exp / 2 >= size)
+ break;
+
+ if (sp->m2exp == 0) /* Nothing big enough found. */
+ {
+ gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
+ return;
+ }
+
+ /* Install scheme. */
+ mpz_init_set_str (a, sp->astr, 0);
+ gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
+ mpz_clear (a);
+ break;
+ }
+
+#if 0
+ case GMP_RAND_ALG_BBS: /* Blum, Blum, and Shub. */
+ {
+ mpz_t p, q;
+ mpz_t ztmp;
+
+ /* FIXME: Generate p and q. They must be ``large'' primes,
+ congruent to 3 mod 4. Should we ensure that they meet some
+ of the criterias for being ``hard primes''?*/
+
+ /* These are around 128 bits. */
+ mpz_init_set_str (p, "148028650191182616877187862194899201391", 10);
+ mpz_init_set_str (q, "315270837425234199477225845240496832591", 10);
+
+ /* Allocate algorithm specific data. */
+ rstate->data.bbs = (__gmp_rand_data_bbs *)
+ (*_mp_allocate_func) (sizeof (__gmp_rand_data_bbs));
+
+ mpz_init (rstate->data.bbs->bi); /* The Blum integer. */
+ mpz_mul (rstate->data.bbs->bi, p, q);
+
+ /* Find a seed, x, with gcd (x, bi) == 1. */
+ mpz_init (ztmp);
+ while (1)
+ {
+ mpz_gcd (ztmp, seed, rstate->data.bbs->bi);
+ if (!mpz_cmp_ui (ztmp, 1))
+ break;
+ mpz_add_ui (seed, seed, 1);
+ }
+
+ rstate->alg = alg;
+ rstate->size = size; /* FIXME: Remove. */
+ mpz_set (rstate->seed, seed);
+
+ mpz_clear (p);
+ mpz_clear (q);
+ mpz_clear (ztmp);
+ break;
+ }
+#endif /* 0 */
+
+ default: /* Bad choice. */
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ }
+
+ va_end (ap);
+}
diff --git a/rts/gmp/randclr.c b/rts/gmp/randclr.c
new file mode 100644
index 0000000000..5cb0291165
--- /dev/null
+++ b/rts/gmp/randclr.c
@@ -0,0 +1,54 @@
+/* gmp_randclear (state) -- Clear and deallocate random state STATE.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randclear (gmp_randstate_t rstate)
+#else
+gmp_randclear (rstate)
+ gmp_randstate_t rstate;
+#endif
+{
+ mpz_clear (rstate->seed);
+
+ switch (rstate->alg)
+ {
+ case GMP_RAND_ALG_LC:
+ mpz_clear (rstate->algdata.lc->a);
+ if (rstate->algdata.lc->m2exp == 0)
+ mpz_clear (rstate->algdata.lc->m);
+ (*_mp_free_func) (rstate->algdata.lc, sizeof (*rstate->algdata.lc));
+ break;
+
+#if 0
+ case GMP_RAND_ALG_BBS:
+ mpz_clear (rstate->algdata.bbs->bi);
+ (*_mp_free_func) (rstate->algdata.bbs, sizeof (*rstate->algdata.bbs));
+ break;
+#endif /* 0 */
+
+ default:
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ }
+}
diff --git a/rts/gmp/randlc.c b/rts/gmp/randlc.c
new file mode 100644
index 0000000000..7079db827e
--- /dev/null
+++ b/rts/gmp/randlc.c
@@ -0,0 +1,56 @@
+/* gmp_randinit_lc (state, a, c, m) -- Initialize a random state for a
+ linear congruential generator with multiplier A, adder C, and
+ modulus M.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randinit_lc (gmp_randstate_t rstate,
+ mpz_t a,
+ unsigned long int c,
+ mpz_t m)
+#else
+gmp_randinit_lc (rstate, a, c, m)
+ gmp_randstate_t rstate;
+ mpz_t a;
+ unsigned long int c;
+ mpz_t m;
+#endif
+{
+ /* FIXME: Not finished. We don't handle this in _gmp_rand() yet. */
+ abort ();
+
+ mpz_init_set_ui (rstate->seed, 1);
+ _mpz_realloc (rstate->seed, ABSIZ (m));
+
+ /* Allocate algorithm specific data. */
+ rstate->algdata.lc = (__gmp_randata_lc *)
+ (*_mp_allocate_func) (sizeof (__gmp_randata_lc));
+
+ mpz_init_set (rstate->algdata.lc->a, a);
+ rstate->algdata.lc->c = c;
+ mpz_init_set (rstate->algdata.lc->m, m);
+
+ rstate->alg = GMP_RAND_ALG_LC;
+}
diff --git a/rts/gmp/randlc2x.c b/rts/gmp/randlc2x.c
new file mode 100644
index 0000000000..dbd5f041ee
--- /dev/null
+++ b/rts/gmp/randlc2x.c
@@ -0,0 +1,59 @@
+/* gmp_randinit_lc_2exp (state, a, c, m2exp) -- Initialize random
+ state STATE for a linear congruential generator with multiplier A,
+ adder C, and modulus 2 ^ M2EXP.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randinit_lc_2exp (gmp_randstate_t rstate,
+ mpz_t a,
+ unsigned long int c,
+ unsigned long int m2exp)
+#else
+gmp_randinit_lc_2exp (rstate, a, c, m2exp)
+ gmp_randstate_t rstate;
+ mpz_t a;
+ unsigned long int c;
+ unsigned long int m2exp;
+#endif
+{
+ mpz_init_set_ui (rstate->seed, 1);
+ _mpz_realloc (rstate->seed, m2exp / BITS_PER_MP_LIMB
+ + (m2exp % BITS_PER_MP_LIMB != 0));
+
+ /* Allocate algorithm specific data. */
+ rstate->algdata.lc = (__gmp_randata_lc *)
+ (*_mp_allocate_func) (sizeof (__gmp_randata_lc));
+
+ mpz_init_set (rstate->algdata.lc->a, a);
+ rstate->algdata.lc->c = c;
+
+ /* Cover weird case where m2exp is 0, which means that m is used
+ instead of m2exp. */
+ if (m2exp == 0)
+ mpz_init_set_ui (rstate->algdata.lc->m, 0);
+ rstate->algdata.lc->m2exp = m2exp;
+
+ rstate->alg = GMP_RAND_ALG_LC;
+}
diff --git a/rts/gmp/randraw.c b/rts/gmp/randraw.c
new file mode 100644
index 0000000000..c0c3889d33
--- /dev/null
+++ b/rts/gmp/randraw.c
@@ -0,0 +1,360 @@
+/* _gmp_rand (rp, state, nbits) -- Generate a random bitstream of
+ length NBITS in RP. RP must have enough space allocated to hold
+ NBITS.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For linear congruential (LC), we use one of algorithms (1) or (2).
+ (gmp-3.0 uses algorithm (1) with 'm' as a power of 2.)
+
+LC algorithm (1).
+
+ X = (aX + c) mod m
+
+[D. Knuth, "The Art of Computer Programming: Volume 2, Seminumerical Algorithms",
+Third Edition, Addison Wesley, 1998, pp. 184-185.]
+
+ X is the seed and the result
+ a is chosen so that
+ a mod 8 = 5 [3.2.1.2] and [3.2.1.3]
+ .01m < a < .99m
+ its binary or decimal digits is not a simple, regular pattern
+ it has no large quotients when Euclid's algorithm is used to find
+ gcd(a, m) [3.3.3]
+ it passes the spectral test [3.3.4]
+ it passes several tests of [3.3.2]
+ c has no factor in common with m (c=1 or c=a can be good)
+ m is large (2^30)
+ is a power of 2 [3.2.1.1]
+
+The least significant digits of the generated number are not very
+random. It should be regarded as a random fraction X/m. To get a
+random integer between 0 and n-1, multiply X/m by n and truncate.
+(Don't use X/n [ex 3.4.1-3])
+
+The ``accuracy'' in t dimensions is one part in ``the t'th root of m'' [3.3.4].
+
+Don't generate more than about m/1000 numbers without changing a, c, or m.
+
+The sequence length depends on chosen a,c,m.
+
+
+LC algorithm (2).
+
+ X = a * (X mod q) - r * (long) (X/q)
+ if X<0 then X+=m
+
+[Knuth, pp. 185-186.]
+
+ X is the seed and the result
+ as a seed is nonzero and less than m
+ a is a primitive root of m (which means that a^2 <= m)
+ q is (long) m / a
+ r is m mod a
+ m is a prime number near the largest easily computed integer
+
+which gives
+
+ X = a * (X % ((long) m / a)) -
+ (M % a) * ((long) (X / ((long) m / a)))
+
+Since m is prime, the least-significant bits of X are just as random as
+the most-significant bits. */
+
+/* Blum, Blum, and Shub.
+
+ [Bruce Schneier, "Applied Cryptography", Second Edition, John Wiley
+ & Sons, Inc., 1996, pp. 417-418.]
+
+ "Find two large prime numbers, p and q, which are congruent to 3
+ modulo 4. The product of those numbers, n, is a blum integer.
+ Choose another random integer, x, which is relatively prime to n.
+ Compute
+ x[0] = x^2 mod n
+ That's the seed for the generator."
+
+ To generate a random bit, compute
+ x[i] = x[i-1]^2 mod n
+ The least significant bit of x[i] is the one we want.
+
+ We can use more than one bit from x[i], namely the
+ log2(bitlength of x[i])
+ least significant bits of x[i].
+
+ So, for a 32-bit seed we get 5 bits per computation.
+
+ The non-predictability of this generator is based on the difficulty
+ of factoring n.
+ */
+
+/* -------------------------------------------------- */
+
+/* lc (rp, state) -- Generate next number in LC sequence. Return the
+ number of valid bits in the result. NOTE: If 'm' is a power of 2
+ (m2exp != 0), discard the lower half of the result. */
+
+static
+unsigned long int
+#if __STDC__
+lc (mp_ptr rp, gmp_randstate_t rstate)
+#else
+lc (rp, rstate)
+ mp_ptr rp;
+ gmp_randstate_t rstate;
+#endif
+{
+ mp_ptr tp, seedp, ap;
+ mp_size_t ta;
+ mp_size_t tn, seedn, an;
+ mp_size_t retval;
+ int shiftcount = 0;
+ unsigned long int m2exp;
+ mp_limb_t c;
+ TMP_DECL (mark);
+
+ m2exp = rstate->algdata.lc->m2exp;
+ c = (mp_limb_t) rstate->algdata.lc->c;
+
+ seedp = PTR (rstate->seed);
+ seedn = SIZ (rstate->seed);
+
+ if (seedn == 0)
+ {
+ /* Seed is 0. Result is C % M. */
+ *rp = c;
+
+ if (m2exp != 0)
+ {
+ /* M is a power of 2. */
+ if (m2exp < BITS_PER_MP_LIMB)
+ {
+ /* Only necessary when M may be smaller than C. */
+ *rp &= (((mp_limb_t) 1 << m2exp) - 1);
+ }
+ }
+ else
+ {
+ /* M is not a power of 2. */
+ abort (); /* FIXME. */
+ }
+
+ /* Save result as next seed. */
+ *seedp = *rp;
+ SIZ (rstate->seed) = 1;
+ return BITS_PER_MP_LIMB;
+ }
+
+ ap = PTR (rstate->algdata.lc->a);
+ an = SIZ (rstate->algdata.lc->a);
+
+ /* Allocate temporary storage. Let there be room for calculation of
+ (A * seed + C) % M, or M if bigger than that. */
+
+ ASSERT_ALWAYS (m2exp != 0); /* FIXME. */
+
+ TMP_MARK (mark);
+ ta = an + seedn + 1;
+ tp = (mp_ptr) TMP_ALLOC (ta * BYTES_PER_MP_LIMB);
+ MPN_ZERO (tp, ta);
+
+ /* t = a * seed */
+ if (seedn >= an)
+ mpn_mul_basecase (tp, seedp, seedn, ap, an);
+ else
+ mpn_mul_basecase (tp, ap, an, seedp, seedn);
+ tn = an + seedn;
+
+ /* t = t + c */
+ mpn_incr_u (tp, c);
+
+ /* t = t % m */
+ if (m2exp != 0)
+ {
+ /* M is a power of 2. The mod operation is trivial. */
+
+ tp[m2exp / BITS_PER_MP_LIMB] &= ((mp_limb_t) 1 << m2exp % BITS_PER_MP_LIMB) - 1;
+ tn = (m2exp + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ }
+ else
+ {
+ abort (); /* FIXME. */
+ }
+
+ /* Save result as next seed. */
+ MPN_COPY (PTR (rstate->seed), tp, tn);
+ SIZ (rstate->seed) = tn;
+
+ if (m2exp != 0)
+ {
+ /* Discard the lower half of the result. */
+ unsigned long int discardb = m2exp / 2;
+ mp_size_t discardl = discardb / BITS_PER_MP_LIMB;
+
+ tn -= discardl;
+ if (tn > 0)
+ {
+ if (discardb % BITS_PER_MP_LIMB != 0)
+ {
+ mpn_rshift (tp, tp + discardl, tn, discardb % BITS_PER_MP_LIMB);
+ MPN_COPY (rp, tp, (discardb + BITS_PER_MP_LIMB -1) / BITS_PER_MP_LIMB);
+ }
+ else /* Even limb boundary. */
+ MPN_COPY_INCR (rp, tp + discardl, tn);
+ }
+ }
+ else
+ {
+ MPN_COPY (rp, tp, tn);
+ }
+
+ TMP_FREE (mark);
+
+ /* Return number of valid bits in the result. */
+ if (m2exp != 0)
+ retval = (m2exp + 1) / 2;
+ else
+ retval = SIZ (rstate->algdata.lc->m) * BITS_PER_MP_LIMB - shiftcount;
+ return retval;
+}
+
+#ifdef RAWRANDEBUG
+/* Set even bits to EVENBITS and odd bits to ! EVENBITS in RP.
+ Number of bits is m2exp in state. */
+/* FIXME: Remove. */
+unsigned long int
+lc_test (mp_ptr rp, gmp_randstate_t s, const int evenbits)
+{
+ unsigned long int rn, nbits;
+ int f;
+
+ nbits = s->algdata.lc->m2exp / 2;
+ rn = nbits / BITS_PER_MP_LIMB + (nbits % BITS_PER_MP_LIMB != 0);
+ MPN_ZERO (rp, rn);
+
+ for (f = 0; f < nbits; f++)
+ {
+ mpn_lshift (rp, rp, rn, 1);
+ if (f % 2 == ! evenbits)
+ rp[0] += 1;
+ }
+
+ return nbits;
+}
+#endif /* RAWRANDEBUG */
+
+void
+#if __STDC__
+_gmp_rand (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+_gmp_rand (rp, rstate, nbits)
+ mp_ptr rp;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ mp_size_t rn; /* Size of R. */
+
+ rn = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+
+ switch (rstate->alg)
+ {
+ case GMP_RAND_ALG_LC:
+ {
+ unsigned long int rbitpos;
+ int chunk_nbits;
+ mp_ptr tp;
+ mp_size_t tn;
+ TMP_DECL (lcmark);
+
+ TMP_MARK (lcmark);
+
+ chunk_nbits = rstate->algdata.lc->m2exp / 2;
+ tn = (chunk_nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+
+ tp = (mp_ptr) TMP_ALLOC (tn * BYTES_PER_MP_LIMB);
+
+ rbitpos = 0;
+ while (rbitpos + chunk_nbits <= nbits)
+ {
+ mp_ptr r2p = rp + rbitpos / BITS_PER_MP_LIMB;
+
+ if (rbitpos % BITS_PER_MP_LIMB != 0)
+ {
+ mp_limb_t savelimb, rcy;
+ /* Target of of new chunk is not bit aligned. Use temp space
+ and align things by shifting it up. */
+ lc (tp, rstate);
+ savelimb = r2p[0];
+ rcy = mpn_lshift (r2p, tp, tn, rbitpos % BITS_PER_MP_LIMB);
+ r2p[0] |= savelimb;
+/* bogus */ if ((chunk_nbits % BITS_PER_MP_LIMB + rbitpos % BITS_PER_MP_LIMB)
+ > BITS_PER_MP_LIMB)
+ r2p[tn] = rcy;
+ }
+ else
+ {
+ /* Target of of new chunk is bit aligned. Let `lc' put bits
+ directly into our target variable. */
+ lc (r2p, rstate);
+ }
+ rbitpos += chunk_nbits;
+ }
+
+ /* Handle last [0..chunk_nbits) bits. */
+ if (rbitpos != nbits)
+ {
+ mp_ptr r2p = rp + rbitpos / BITS_PER_MP_LIMB;
+ int last_nbits = nbits - rbitpos;
+ tn = (last_nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ lc (tp, rstate);
+ if (rbitpos % BITS_PER_MP_LIMB != 0)
+ {
+ mp_limb_t savelimb, rcy;
+ /* Target of of new chunk is not bit aligned. Use temp space
+ and align things by shifting it up. */
+ savelimb = r2p[0];
+ rcy = mpn_lshift (r2p, tp, tn, rbitpos % BITS_PER_MP_LIMB);
+ r2p[0] |= savelimb;
+ if (rbitpos + tn * BITS_PER_MP_LIMB - rbitpos % BITS_PER_MP_LIMB < nbits)
+ r2p[tn] = rcy;
+ }
+ else
+ {
+ MPN_COPY (r2p, tp, tn);
+ }
+ /* Mask off top bits if needed. */
+ if (nbits % BITS_PER_MP_LIMB != 0)
+ rp[nbits / BITS_PER_MP_LIMB]
+ &= ~ ((~(mp_limb_t) 0) << nbits % BITS_PER_MP_LIMB);
+ }
+
+ TMP_FREE (lcmark);
+ break;
+ }
+
+ default:
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ break;
+ }
+}
diff --git a/rts/gmp/randsd.c b/rts/gmp/randsd.c
new file mode 100644
index 0000000000..3bed14b578
--- /dev/null
+++ b/rts/gmp/randsd.c
@@ -0,0 +1,37 @@
+/* gmp_randseed (state, seed) -- Set initial seed SEED in random state
+ STATE.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randseed (gmp_randstate_t rstate,
+ mpz_t seed)
+#else
+gmp_randseed (rstate, seed)
+ gmp_randstate_t rstate;
+ mpz_t seed;
+#endif
+{
+ mpz_set (rstate->seed, seed);
+}
diff --git a/rts/gmp/randsdui.c b/rts/gmp/randsdui.c
new file mode 100644
index 0000000000..92f412f3ea
--- /dev/null
+++ b/rts/gmp/randsdui.c
@@ -0,0 +1,37 @@
+/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
+ state STATE.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randseed_ui (gmp_randstate_t rstate,
+ unsigned long int seed)
+#else
+gmp_randseed_ui (rstate, seed)
+ gmp_randstate_t rstate;
+ mpz_t seed;
+#endif
+{
+ mpz_set_ui (rstate->seed, seed);
+}
diff --git a/rts/gmp/stack-alloc.c b/rts/gmp/stack-alloc.c
new file mode 100644
index 0000000000..9ab98fe5f9
--- /dev/null
+++ b/rts/gmp/stack-alloc.c
@@ -0,0 +1,136 @@
+/* Stack allocation routines. This is intended for machines without support
+ for the `alloca' function.
+
+Copyright (C) 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "stack-alloc.h"
+
+#define __need_size_t
+#include <stddef.h>
+#undef __need_size_t
+
+/* gmp-impl.h and stack-alloc.h conflict when not USE_STACK_ALLOC, so these
+ declarations are copied here */
+#if __STDC__
+extern void * (*__gmp_allocate_func) (size_t);
+extern void (*__gmp_free_func) (void *, size_t);
+#else
+extern void * (*__gmp_allocate_func) ();
+extern void (*__gmp_free_func) ();
+#endif
+
+typedef struct tmp_stack tmp_stack;
+
+static unsigned long max_total_allocation = 0;
+static unsigned long current_total_allocation = 0;
+
+static tmp_stack xxx = {&xxx, &xxx, 0};
+static tmp_stack *current = &xxx;
+
+/* The rounded size of the header of each allocation block. */
+#define HSIZ ((sizeof (tmp_stack) + __TMP_ALIGN - 1) & -__TMP_ALIGN)
+
+/* Allocate a block of exactly <size> bytes. This should only be called
+ through the TMP_ALLOC macro, which takes care of rounding/alignment. */
+void *
+#if __STDC__
+__gmp_tmp_alloc (unsigned long size)
+#else
+__gmp_tmp_alloc (size)
+ unsigned long size;
+#endif
+{
+ void *that;
+
+ if (size > (char *) current->end - (char *) current->alloc_point)
+ {
+ void *chunk;
+ tmp_stack *header;
+ unsigned long chunk_size;
+ unsigned long now;
+
+ /* Allocate a chunk that makes the total current allocation somewhat
+ larger than the maximum allocation ever. If size is very large, we
+ allocate that much. */
+
+ now = current_total_allocation + size;
+ if (now > max_total_allocation)
+ {
+ /* We need more temporary memory than ever before. Increase
+ for future needs. */
+ now = now * 3 / 2;
+ chunk_size = now - current_total_allocation + HSIZ;
+ current_total_allocation = now;
+ max_total_allocation = current_total_allocation;
+ }
+ else
+ {
+ chunk_size = max_total_allocation - current_total_allocation + HSIZ;
+ current_total_allocation = max_total_allocation;
+ }
+
+ chunk = (*__gmp_allocate_func) (chunk_size);
+ header = (tmp_stack *) chunk;
+ header->end = (char *) chunk + chunk_size;
+ header->alloc_point = (char *) chunk + HSIZ;
+ header->prev = current;
+ current = header;
+ }
+
+ that = current->alloc_point;
+ current->alloc_point = (char *) that + size;
+ return that;
+}
+
+/* Typically called at function entry. <mark> is assigned so that
+ __gmp_tmp_free can later be used to reclaim all subsequently allocated
+ storage. */
+void
+#if __STDC__
+__gmp_tmp_mark (tmp_marker *mark)
+#else
+__gmp_tmp_mark (mark)
+ tmp_marker *mark;
+#endif
+{
+ mark->which_chunk = current;
+ mark->alloc_point = current->alloc_point;
+}
+
+/* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */
+void
+#if __STDC__
+__gmp_tmp_free (tmp_marker *mark)
+#else
+__gmp_tmp_free (mark)
+ tmp_marker *mark;
+#endif
+{
+ while (mark->which_chunk != current)
+ {
+ tmp_stack *tmp;
+
+ tmp = current;
+ current = tmp->prev;
+ current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);
+ (*__gmp_free_func) (tmp, (char *) tmp->end - (char *) tmp);
+ }
+ current->alloc_point = mark->alloc_point;
+}
diff --git a/rts/gmp/stack-alloc.h b/rts/gmp/stack-alloc.h
new file mode 100644
index 0000000000..f59beec266
--- /dev/null
+++ b/rts/gmp/stack-alloc.h
@@ -0,0 +1,64 @@
+/* Stack allocation routines. This is intended for machines without support
+ for the `alloca' function.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+struct tmp_stack
+{
+ void *end;
+ void *alloc_point;
+ struct tmp_stack *prev;
+};
+
+struct tmp_marker
+{
+ struct tmp_stack *which_chunk;
+ void *alloc_point;
+};
+
+typedef struct tmp_marker tmp_marker;
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if __STDC__
+void *__gmp_tmp_alloc (unsigned long);
+void __gmp_tmp_mark (tmp_marker *);
+void __gmp_tmp_free (tmp_marker *);
+#else
+void *__gmp_tmp_alloc ();
+void __gmp_tmp_mark ();
+void __gmp_tmp_free ();
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+#ifndef __TMP_ALIGN
+#define __TMP_ALIGN 8
+#endif
+
+#define TMP_DECL(marker) tmp_marker marker
+#define TMP_ALLOC(size) \
+ __gmp_tmp_alloc (((unsigned long) (size) + __TMP_ALIGN - 1) & -__TMP_ALIGN)
+#define TMP_MARK(marker) __gmp_tmp_mark (&marker)
+#define TMP_FREE(marker) __gmp_tmp_free (&marker)
diff --git a/rts/gmp/stamp-h.in b/rts/gmp/stamp-h.in
new file mode 100644
index 0000000000..9788f70238
--- /dev/null
+++ b/rts/gmp/stamp-h.in
@@ -0,0 +1 @@
+timestamp
diff --git a/rts/gmp/stamp-vti b/rts/gmp/stamp-vti
new file mode 100644
index 0000000000..e3186186b2
--- /dev/null
+++ b/rts/gmp/stamp-vti
@@ -0,0 +1,3 @@
+@set UPDATED 5 October 2000
+@set EDITION 3.1.1
+@set VERSION 3.1.1
diff --git a/rts/gmp/urandom.h b/rts/gmp/urandom.h
new file mode 100644
index 0000000000..313479e8b7
--- /dev/null
+++ b/rts/gmp/urandom.h
@@ -0,0 +1,86 @@
+/* urandom.h -- define urandom returning a full unsigned long random value.
+
+Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#if defined (__hpux) || defined (__svr4__) || defined (__SVR4)
+/* HPUX lacks random(). */
+static inline mp_limb_t
+urandom ()
+{
+ return mrand48 ();
+}
+#define __URANDOM
+#endif
+
+#if defined(_WIN32) && !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+/* MS CRT supplies just the poxy rand(), with an upper bound of 0x7fff */
+static inline unsigned long
+urandom ()
+{
+ return rand () ^ (rand () << 16) ^ (rand() << 32);
+}
+#define __URANDOM
+#endif
+
+#if defined (__alpha) && !defined (__URANDOM)
+/* DEC OSF/1 1.2 random() returns a double. */
+long mrand48 ();
+static inline mp_limb_t
+urandom ()
+{
+ return mrand48 () | (mrand48 () << 32);
+}
+#define __URANDOM
+#endif
+
+#if BITS_PER_MP_LIMB == 32 && !defined (__URANDOM)
+#if defined (__cplusplus)
+extern "C" {
+#endif
+long random ();
+#if defined (__cplusplus)
+}
+#endif
+static inline mp_limb_t
+urandom ()
+{
+ /* random() returns 31 bits, we want 32. */
+ return random () ^ (random () << 1);
+}
+#define __URANDOM
+#endif
+
+#if BITS_PER_MP_LIMB == 64 && !defined (__URANDOM)
+#if defined (__cplusplus)
+extern "C" {
+#endif
+long random ();
+#if defined (__cplusplus)
+}
+#endif
+static inline mp_limb_t
+urandom ()
+{
+ /* random() returns 31 bits, we want 64. */
+ return random () ^ ((mp_limb_t) random () << 31) ^ ((mp_limb_t) random () << 62);
+}
+#define __URANDOM
+#endif
+
diff --git a/rts/gmp/version.c b/rts/gmp/version.c
new file mode 100644
index 0000000000..9d544ee1d8
--- /dev/null
+++ b/rts/gmp/version.c
@@ -0,0 +1,26 @@
+/* gmp_version -- version number compiled into the library */
+
+/*
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const char *gmp_version = VERSION;
diff --git a/rts/gmp/version.texi b/rts/gmp/version.texi
new file mode 100644
index 0000000000..e3186186b2
--- /dev/null
+++ b/rts/gmp/version.texi
@@ -0,0 +1,3 @@
+@set UPDATED 5 October 2000
+@set EDITION 3.1.1
+@set VERSION 3.1.1