1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 // Register usage (z13 convention):
8 // R2 = rp (result pointer)
9 // R3 = ap (source pointer)
10 // R4 = an / idx (loop counter)
11 // R5 = b0 (multiplier limb)
12 // R6 = cy (carry)
13
14 #include "textflag.h"
15
16 // func addMulVVW1024(z, x *uint, y uint) (c uint)
17 TEXT ·addMulVVW1024(SB), $0-32
18 MOVD $16, R4
19 JMP addMulVVWx(SB)
20
21 // func addMulVVW1536(z, x *uint, y uint) (c uint)
22 TEXT ·addMulVVW1536(SB), $0-32
23 MOVD $24, R4
24 JMP addMulVVWx(SB)
25
26 // func addMulVVW2048(z, x *uint, y uint) (c uint)
27 TEXT ·addMulVVW2048(SB), $0-32
28 MOVD $32, R4
29 JMP addMulVVWx(SB)
30
31 TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
32 MOVD z+0(FP), R2
33 MOVD x+8(FP), R3
34 MOVD y+16(FP), R5
35
36 MOVD $0, R6
37
38 L_ent:
39 VZERO V0
40 VZERO V2
41 SRD $2, R4, R10
42 TMLL R4, $1
43 BRC $8, L_bx0
44
45 L_bx1:
46 VLEG $1, 0(R2), V2
47 VZERO V4
48 TMLL R4, $2
49 BRC $7, L_b11
50
51 L_b01:
52 MOVD $-24, R4
53 MOVD R6, R0
54 MOVD 0(R3), R7
55 MLGR R5, R6
56 ADDC R0, R7
57 MOVD $0, R0
58 ADDE R0, R6
59 VLVGG $1, R7, V4
60 VAQ V2, V4, V2
61 VSTEG $1, V2, 0(R2)
62 VMRHG V2, V2, V2
63 CMPBEQ R10, $0, L_1
64 BR L_cj0
65
66 L_b11:
67 MOVD $-8, R4
68 MOVD 0(R3), R9
69 MLGR R5, R8
70 ADDC R6, R9
71 MOVD $0, R6
72 ADDE R6, R8
73 VLVGG $1, R9, V4
74 VAQ V2, V4, V2
75 VSTEG $1, V2, 0(R2)
76 VMRHG V2, V2, V2
77 BR L_cj1
78
79 L_bx0:
80 TMLL R4, $2
81 BRC $7, L_b10
82
83 L_b00:
84 MOVD $-32, R4
85
86 L_cj0:
87 MOVD 32(R3)(R4), R1
88 MOVD 40(R3)(R4), R9
89 MLGR R5, R0
90 MLGR R5, R8
91 VL 32(R4)(R2), V1
92 VPDI $4, V1, V1, V1
93 VLVGP R0, R1, V6
94 VLVGP R9, R6, V7
95 BR L_mid
96
97 L_b10:
98 MOVD $-16, R4
99 MOVD R6, R8
100
101 L_cj1:
102 MOVD 16(R4)(R3), R1
103 MOVD 24(R4)(R3), R7
104 MLGR R5, R0
105 MLGR R5, R6
106 VL 16(R4)(R2), V1
107 VPDI $4, V1, V1, V1
108 VLVGP R0, R1, V6
109 VLVGP R7, R8, V7
110 CMPBEQ R10, $0, L_end
111
112 L_top:
113 MOVD 32(R4)(R3), R1
114 MOVD 40(R4)(R3), R9
115 MLGR R5, R0
116 MLGR R5, R8
117 VACQ V6, V1, V0, V5
118 VACCCQ V6, V1, V0, V0
119 VACQ V5, V7, V2, V3
120 VACCCQ V5, V7, V2, V2
121 VPDI $4, V3, V3, V3
122 VL 32(R4)(R2), V1
123 VPDI $4, V1, V1, V1
124 VST V3, 16(R4)(R2)
125 VLVGP R0, R1, V6
126 VLVGP R9, R6, V7
127
128 L_mid:
129 MOVD 48(R4)(R3), R1
130 MOVD 56(R4)(R3), R7
131 MLGR R5, R0
132 MLGR R5, R6
133 VACQ V6, V1, V0, V5
134 VACCCQ V6, V1, V0, V0
135 VACQ V5, V7, V2, V3
136 VACCCQ V5, V7, V2, V2
137 VPDI $4, V3, V3, V3
138 VL 48(R4)(R2), V1
139 VPDI $4, V1, V1, V1
140 VST V3, 32(R4)(R2)
141 VLVGP R0, R1, V6
142 VLVGP R7, R8, V7
143 MOVD $32(R4), R4
144 BRCTG R10, L_top
145
146 L_end:
147 VACQ V6, V1, V0, V5
148 VACCCQ V6, V1, V0, V0
149 VACQ V5, V7, V2, V3
150 VACCCQ V5, V7, V2, V2
151 VPDI $4, V3, V3, V3
152 VST V3, 16(R2)(R4)
153 VAG V0, V2, V2
154
155 L_1:
156 VLGVG $1, V2, R2
157 ADDC R6, R2
158 MOVD R2, c+24(FP)
159 RET
160
161
View as plain text