1 | dnl PowerPC-64 mpn_gcd_1. |
---|
2 | |
---|
3 | dnl Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc. |
---|
4 | |
---|
5 | dnl This file is part of the GNU MP Library. |
---|
6 | dnl |
---|
7 | dnl The GNU MP Library is free software; you can redistribute it and/or modify |
---|
8 | dnl it under the terms of either: |
---|
9 | dnl |
---|
10 | dnl * the GNU Lesser General Public License as published by the Free |
---|
11 | dnl Software Foundation; either version 3 of the License, or (at your |
---|
12 | dnl option) any later version. |
---|
13 | dnl |
---|
14 | dnl or |
---|
15 | dnl |
---|
16 | dnl * the GNU General Public License as published by the Free Software |
---|
17 | dnl Foundation; either version 2 of the License, or (at your option) any |
---|
18 | dnl later version. |
---|
19 | dnl |
---|
20 | dnl or both in parallel, as here. |
---|
21 | dnl |
---|
22 | dnl The GNU MP Library is distributed in the hope that it will be useful, but |
---|
23 | dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
24 | dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
25 | dnl for more details. |
---|
26 | dnl |
---|
27 | dnl You should have received copies of the GNU General Public License and the |
---|
28 | dnl GNU Lesser General Public License along with the GNU MP Library. If not, |
---|
29 | dnl see https://www.gnu.org/licenses/. |
---|
30 | |
---|
31 | include(`../config.m4') |
---|
32 | |
---|
33 | C cycles/bit (approx) |
---|
34 | C POWER3/PPC630 ? |
---|
35 | C POWER4/PPC970 8.5 |
---|
36 | C POWER5 ? |
---|
37 | C POWER6 10.1 |
---|
38 | C POWER7 9.4 |
---|
39 | C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1 |
---|
40 | |
---|
41 | C INPUT PARAMETERS |
---|
42 | define(`up', `r3') |
---|
43 | define(`n', `r4') |
---|
44 | define(`v0', `r5') |
---|
45 | |
---|
46 | EXTERN_FUNC(mpn_mod_1) |
---|
47 | EXTERN_FUNC(mpn_modexact_1c_odd) |
---|
48 | |
---|
49 | ASM_START() |
---|
50 | PROLOGUE(mpn_gcd_1,toc) |
---|
51 | mflr r0 |
---|
52 | std r30, -16(r1) |
---|
53 | std r31, -8(r1) |
---|
54 | std r0, 16(r1) |
---|
55 | stdu r1, -128(r1) |
---|
56 | |
---|
57 | ld r7, 0(up) C U low limb |
---|
58 | or r0, r5, r7 C x | y |
---|
59 | |
---|
60 | neg r6, r0 |
---|
61 | and r6, r6, r0 |
---|
62 | cntlzd r31, r6 C common twos |
---|
63 | subfic r31, r31, 63 |
---|
64 | |
---|
65 | neg r6, r5 |
---|
66 | and r6, r6, r5 |
---|
67 | cntlzd r8, r6 |
---|
68 | subfic r8, r8, 63 |
---|
69 | srd r5, r5, r8 |
---|
70 | mr r30, r5 C v0 saved |
---|
71 | |
---|
72 | cmpdi r4, BMOD_1_TO_MOD_1_THRESHOLD |
---|
73 | blt L(bmod) |
---|
74 | CALL( mpn_mod_1) |
---|
75 | b L(reduced) |
---|
76 | L(bmod): |
---|
77 | li r6, 0 |
---|
78 | CALL( mpn_modexact_1c_odd) |
---|
79 | L(reduced): |
---|
80 | |
---|
81 | define(`mask', `r0')dnl |
---|
82 | define(`a1', `r4')dnl |
---|
83 | define(`a2', `r5')dnl |
---|
84 | define(`d1', `r6')dnl |
---|
85 | define(`d2', `r7')dnl |
---|
86 | define(`cnt', `r9')dnl |
---|
87 | |
---|
88 | neg. r6, r3 |
---|
89 | and r6, r6, r3 |
---|
90 | cntlzd cnt, r6 |
---|
91 | subfic cnt, cnt, 63 |
---|
92 | li r12, 63 |
---|
93 | bne L(mid) |
---|
94 | b L(end) |
---|
95 | |
---|
96 | ALIGN(16) |
---|
97 | L(top): |
---|
98 | and a1, r10, mask C d - a |
---|
99 | andc a2, r11, mask C a - d |
---|
100 | and d1, r3, mask C a |
---|
101 | andc d2, r30, mask C d |
---|
102 | or r3, a1, a2 C new a |
---|
103 | subf cnt, cnt, r12 |
---|
104 | or r30, d1, d2 C new d |
---|
105 | L(mid): srd r3, r3, cnt |
---|
106 | sub. r10, r30, r3 C r10 = d - a |
---|
107 | subc r11, r3, r30 C r11 = a - d |
---|
108 | neg r8, r10 |
---|
109 | and r8, r8, r10 |
---|
110 | subfe mask, mask, mask |
---|
111 | cntlzd cnt, r8 |
---|
112 | bne L(top) |
---|
113 | |
---|
114 | L(end): sld r3, r30, r31 |
---|
115 | |
---|
116 | addi r1, r1, 128 |
---|
117 | ld r0, 16(r1) |
---|
118 | ld r30, -16(r1) |
---|
119 | ld r31, -8(r1) |
---|
120 | mtlr r0 |
---|
121 | blr |
---|
122 | EPILOGUE() |
---|