summaryrefslogtreecommitdiff
path: root/gmp/mpn/powerpc32/invert_limb.asm
blob: 612bfe523c7413fe63f846ecf7ff66e981efdd93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
dnl  PowerPC-32 mpn_invert_limb -- Invert a normalized limb.

dnl  Copyright 2011 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.
dnl
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of either:
dnl
dnl    * the GNU Lesser General Public License as published by the Free
dnl      Software Foundation; either version 3 of the License, or (at your
dnl      option) any later version.
dnl
dnl  or
dnl
dnl    * the GNU General Public License as published by the Free Software
dnl      Foundation; either version 2 of the License, or (at your option) any
dnl      later version.
dnl
dnl  or both in parallel, as here.
dnl
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
dnl  for more details.
dnl
dnl  You should have received copies of the GNU General Public License and the
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
dnl  see https://www.gnu.org/licenses/.

include(`../config.m4')

C		 cycles/limb
C 603e:		      ?
C 604e:		      ?
C 75x (G3):	      ?
C 7400,7410 (G4):     ?
C 744x,745x (G4+):   32
C power4/ppc970:      ?
C power5:	      ?

EXTERN(approx_tab)

ASM_START()
PROLOGUE(mpn_invert_limb)
	rlwinm	r6, r3, 11, 22, 30	C extract bits 30..22 to pos 2^1
	srwi	r10, r3, 11		C extract bits 31..11
	LEA(	r9, approx_tab)		C N.B. clobbers r0 for ELF and Darwin
	lhzx	r9, r9, r6		C w2
	addi	r0, r10, 1
	mullw	r11, r9, r9
	slwi	r9, r9, 4
	mulhwu	r7, r11, r0
	rlwinm	r11, r3, 0, 31, 31	C extract bit 0
	addi	r0, r9, -1
	srwi	r9, r3, 1		C d >> 1
	subf	r0, r7, r0		C w1
	add	r9, r9, r11		C d31
	mullw	r9, r0, r9		C w1 * d31
	srwi	r10, r0, 1		C w1 >> 1
	neg	r11, r11
	and	r11, r10, r11
	subf	r11, r9, r11
	mulhwu	r9, r11, r0
	slwi	r0, r0, 15
	srwi	r9, r9, 1
	add	r0, r9, r0		C w0
	mullw	r10, r0, r3
	mulhwu	r9, r0, r3
	addc	r11, r10, r3
	adde	r3, r9, r3
	subf	r3, r3, r0
	blr
EPILOGUE()

DEF_OBJECT(approx_tab)
	.short 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
	.short 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
	.short 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
	.short 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
	.short 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
	.short 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
	.short 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
	.short 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
	.short 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
	.short 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
	.short 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
	.short 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
	.short 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
	.short 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
	.short 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
	.short 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
	.short 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
	.short 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
	.short 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
	.short 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
	.short 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
	.short 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
	.short 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
	.short 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
	.short 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
	.short 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
	.short 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
	.short 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
	.short 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
	.short 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
	.short 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
	.short 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
	.short 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
	.short 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
	.short 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
	.short 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
	.short 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
	.short 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
	.short 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
	.short 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
	.short 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
	.short 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
	.short 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
	.short 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
	.short 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
	.short 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
	.short 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
	.short 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
	.short 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
	.short 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
	.short 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
	.short 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
	.short 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
	.short 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
	.short 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
	.short 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
	.short 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
	.short 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
	.short 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
	.short 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
	.short 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
	.short 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
	.short 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
	.short 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
END_OBJECT(approx_tab)
ASM_END()