1
2
3
4
5 package amd64
6
7 import (
8 "fmt"
9 "math"
10
11 "cmd/compile/internal/base"
12 "cmd/compile/internal/ir"
13 "cmd/compile/internal/logopt"
14 "cmd/compile/internal/objw"
15 "cmd/compile/internal/ssa"
16 "cmd/compile/internal/ssagen"
17 "cmd/compile/internal/types"
18 "cmd/internal/obj"
19 "cmd/internal/obj/x86"
20 "internal/abi"
21 )
22
23
24 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
25 flive := b.FlagsLiveAtEnd
26 for _, c := range b.ControlValues() {
27 flive = c.Type.IsFlags() || flive
28 }
29 for i := len(b.Values) - 1; i >= 0; i-- {
30 v := b.Values[i]
31 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
32
33 v.Aux = ssa.AuxMark
34 }
35 if v.Type.IsFlags() {
36 flive = false
37 }
38 for _, a := range v.Args {
39 if a.Type.IsFlags() {
40 flive = true
41 }
42 }
43 }
44 }
45
46 func isFPReg(r int16) bool {
47 return x86.REG_X0 <= r && r <= x86.REG_Z31
48 }
49
50 func isKReg(r int16) bool {
51 return x86.REG_K0 <= r && r <= x86.REG_K7
52 }
53
54 func isLowFPReg(r int16) bool {
55 return x86.REG_X0 <= r && r <= x86.REG_X15
56 }
57
58
59 func loadByRegWidth(r int16, width int64) obj.As {
60
61 if !isFPReg(r) && !isKReg(r) {
62 switch width {
63 case 1:
64 return x86.AMOVBLZX
65 case 2:
66 return x86.AMOVWLZX
67 }
68 }
69
70 return storeByRegWidth(r, width)
71 }
72
73
74
75 func storeByRegWidth(r int16, width int64) obj.As {
76 if isFPReg(r) {
77 switch width {
78 case 4:
79 return x86.AMOVSS
80 case 8:
81 return x86.AMOVSD
82 case 16:
83
84 if isLowFPReg(r) {
85 return x86.AMOVUPS
86 } else {
87 return x86.AVMOVDQU
88 }
89 case 32:
90 return x86.AVMOVDQU
91 case 64:
92 return x86.AVMOVDQU64
93 }
94 }
95 if isKReg(r) {
96 return x86.AKMOVQ
97 }
98
99 switch width {
100 case 1:
101 return x86.AMOVB
102 case 2:
103 return x86.AMOVW
104 case 4:
105 return x86.AMOVL
106 case 8:
107 return x86.AMOVQ
108 }
109 panic(fmt.Sprintf("bad store reg=%v, width=%d", r, width))
110 }
111
112
113 func moveByRegsWidth(dest, src int16, width int64) obj.As {
114
115 if isFPReg(dest) && isFPReg(src) {
116
117
118
119
120 if isLowFPReg(dest) && isLowFPReg(src) && width <= 16 {
121 return x86.AMOVUPS
122 }
123 if width <= 32 {
124 return x86.AVMOVDQU
125 }
126 return x86.AVMOVDQU64
127 }
128
129 if isKReg(dest) || isKReg(src) {
130 if isFPReg(dest) || isFPReg(src) {
131 panic(fmt.Sprintf("bad move, src=%v, dest=%v, width=%d", src, dest, width))
132 }
133 return x86.AKMOVQ
134 }
135
136 switch width {
137 case 1:
138
139 return x86.AMOVL
140 case 2:
141 return x86.AMOVL
142 case 4:
143 return x86.AMOVL
144 case 8:
145 return x86.AMOVQ
146 case 16:
147 if isLowFPReg(dest) && isLowFPReg(src) {
148
149 return x86.AMOVUPS
150 } else {
151 return x86.AVMOVDQU
152 }
153 case 32:
154 return x86.AVMOVDQU
155 case 64:
156 return x86.AVMOVDQU64
157 }
158 panic(fmt.Sprintf("bad move, src=%v, dest=%v, width=%d", src, dest, width))
159 }
160
161
162
163
164
165
166
167 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
168 p := s.Prog(op)
169 p.From.Type = obj.TYPE_REG
170 p.To.Type = obj.TYPE_REG
171 p.To.Reg = dest
172 p.From.Reg = src
173 return p
174 }
175
176
177
178
179
180 func memIdx(a *obj.Addr, v *ssa.Value) {
181 r, i := v.Args[0].Reg(), v.Args[1].Reg()
182 a.Type = obj.TYPE_MEM
183 a.Scale = v.Op.Scale()
184 if a.Scale == 1 && i == x86.REG_SP {
185 r, i = i, r
186 }
187 a.Reg = r
188 a.Index = i
189 }
190
191 func getgFromTLS(s *ssagen.State, r int16) {
192
193
194 if x86.CanUse1InsnTLS(base.Ctxt) {
195
196 p := s.Prog(x86.AMOVQ)
197 p.From.Type = obj.TYPE_MEM
198 p.From.Reg = x86.REG_TLS
199 p.To.Type = obj.TYPE_REG
200 p.To.Reg = r
201 } else {
202
203
204 p := s.Prog(x86.AMOVQ)
205 p.From.Type = obj.TYPE_REG
206 p.From.Reg = x86.REG_TLS
207 p.To.Type = obj.TYPE_REG
208 p.To.Reg = r
209 q := s.Prog(x86.AMOVQ)
210 q.From.Type = obj.TYPE_MEM
211 q.From.Reg = r
212 q.From.Index = x86.REG_TLS
213 q.From.Scale = 1
214 q.To.Type = obj.TYPE_REG
215 q.To.Reg = r
216 }
217 }
218
219 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
220 switch v.Op {
221 case ssa.OpAMD64VFMADD231SD, ssa.OpAMD64VFMADD231SS:
222 p := s.Prog(v.Op.Asm())
223 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
224 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
225 p.AddRestSourceReg(v.Args[1].Reg())
226 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
227 r := v.Reg()
228 r1 := v.Args[0].Reg()
229 r2 := v.Args[1].Reg()
230 switch {
231 case r == r1:
232 p := s.Prog(v.Op.Asm())
233 p.From.Type = obj.TYPE_REG
234 p.From.Reg = r2
235 p.To.Type = obj.TYPE_REG
236 p.To.Reg = r
237 case r == r2:
238 p := s.Prog(v.Op.Asm())
239 p.From.Type = obj.TYPE_REG
240 p.From.Reg = r1
241 p.To.Type = obj.TYPE_REG
242 p.To.Reg = r
243 default:
244 var asm obj.As
245 if v.Op == ssa.OpAMD64ADDQ {
246 asm = x86.ALEAQ
247 } else {
248 asm = x86.ALEAL
249 }
250 p := s.Prog(asm)
251 p.From.Type = obj.TYPE_MEM
252 p.From.Reg = r1
253 p.From.Scale = 1
254 p.From.Index = r2
255 p.To.Type = obj.TYPE_REG
256 p.To.Reg = r
257 }
258
259 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
260 ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
261 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
262 ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
263 ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
264 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
265 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
266 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
267 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
268 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
269 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
270 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
271 ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
272 ssa.OpAMD64POR, ssa.OpAMD64PXOR,
273 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
274 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
275 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ,
276 ssa.OpAMD64PCMPEQB, ssa.OpAMD64PSIGNB,
277 ssa.OpAMD64PUNPCKLBW:
278 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
279
280 case ssa.OpAMD64PSHUFLW:
281 p := s.Prog(v.Op.Asm())
282 imm := v.AuxInt
283 if imm < 0 || imm > 255 {
284 v.Fatalf("Invalid source selection immediate")
285 }
286 p.From.Offset = imm
287 p.From.Type = obj.TYPE_CONST
288 p.AddRestSourceReg(v.Args[0].Reg())
289 p.To.Type = obj.TYPE_REG
290 p.To.Reg = v.Reg()
291
292 case ssa.OpAMD64PSHUFBbroadcast:
293
294
295
296
297 if s.ABI != obj.ABIInternal {
298
299 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
300 }
301
302 p := s.Prog(v.Op.Asm())
303 p.From.Type = obj.TYPE_REG
304 p.To.Type = obj.TYPE_REG
305 p.To.Reg = v.Reg()
306 p.From.Reg = x86.REG_X15
307
308 case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
309 p := s.Prog(v.Op.Asm())
310 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
311 p.From.Type = obj.TYPE_REG
312 p.From.Reg = bits
313 p.To.Type = obj.TYPE_REG
314 p.To.Reg = lo
315 p.AddRestSourceReg(hi)
316
317 case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
318 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
319 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
320 p := s.Prog(v.Op.Asm())
321 p.From.Type = obj.TYPE_REG
322 p.From.Reg = v.Args[0].Reg()
323 p.To.Type = obj.TYPE_REG
324 switch v.Op {
325 case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
326 p.To.Reg = v.Reg0()
327 default:
328 p.To.Reg = v.Reg()
329 }
330
331 case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
332 p := s.Prog(v.Op.Asm())
333 p.From.Type = obj.TYPE_REG
334 p.From.Reg = v.Args[0].Reg()
335 p.To.Type = obj.TYPE_REG
336 p.To.Reg = v.Reg()
337 p.AddRestSourceReg(v.Args[1].Reg())
338
339 case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
340 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
341 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
342 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
343 p.AddRestSourceReg(v.Args[0].Reg())
344
345 case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
346 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
347 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
348 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
349 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
350 ssagen.AddAux(&m, v)
351 p.AddRestSource(m)
352
353 case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
354 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
355 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
356 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
357 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
358 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
359 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
360 m := obj.Addr{Type: obj.TYPE_MEM}
361 memIdx(&m, v)
362 ssagen.AddAux(&m, v)
363 p.AddRestSource(m)
364
365 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
366
367
368
369
370 r := v.Args[1].Reg()
371
372
373 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
374
375
376 p := s.Prog(v.Op.Asm())
377 p.From.Type = obj.TYPE_REG
378 p.From.Reg = r
379
380 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
381
382
383
384
385 r := v.Args[1].Reg()
386
387 var opCMP, opNEG, opSXD obj.As
388 switch v.Op {
389 case ssa.OpAMD64DIVQ:
390 opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
391 case ssa.OpAMD64DIVL:
392 opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
393 case ssa.OpAMD64DIVW:
394 opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
395 }
396
397
398
399 var j1, j2 *obj.Prog
400 if ssa.DivisionNeedsFixUp(v) {
401 c := s.Prog(opCMP)
402 c.From.Type = obj.TYPE_REG
403 c.From.Reg = r
404 c.To.Type = obj.TYPE_CONST
405 c.To.Offset = -1
406
407
408 j1 = s.Prog(x86.AJNE)
409 j1.To.Type = obj.TYPE_BRANCH
410
411
412
413 n1 := s.Prog(opNEG)
414 n1.To.Type = obj.TYPE_REG
415 n1.To.Reg = x86.REG_AX
416
417
418 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
419
420
421
422
423
424 j2 = s.Prog(obj.AJMP)
425 j2.To.Type = obj.TYPE_BRANCH
426 }
427
428
429 p := s.Prog(opSXD)
430 if j1 != nil {
431 j1.To.SetTarget(p)
432 }
433 p = s.Prog(v.Op.Asm())
434 p.From.Type = obj.TYPE_REG
435 p.From.Reg = r
436
437 if j2 != nil {
438 j2.To.SetTarget(s.Pc())
439 }
440
441 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
442
443
444
445
446
447
448 p := s.Prog(v.Op.Asm())
449 p.From.Type = obj.TYPE_REG
450 p.From.Reg = v.Args[1].Reg()
451
452
453
454 if v.Type.Size() == 1 {
455 m := s.Prog(x86.AMOVB)
456 m.From.Type = obj.TYPE_REG
457 m.From.Reg = x86.REG_AH
458 m.To.Type = obj.TYPE_REG
459 m.To.Reg = x86.REG_DX
460 }
461
462 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
463
464
465 p := s.Prog(v.Op.Asm())
466 p.From.Type = obj.TYPE_REG
467 p.From.Reg = v.Args[1].Reg()
468
469 case ssa.OpAMD64MULQU2:
470
471
472 p := s.Prog(v.Op.Asm())
473 p.From.Type = obj.TYPE_REG
474 p.From.Reg = v.Args[1].Reg()
475
476 case ssa.OpAMD64DIVQU2:
477
478
479 p := s.Prog(v.Op.Asm())
480 p.From.Type = obj.TYPE_REG
481 p.From.Reg = v.Args[2].Reg()
482
483 case ssa.OpAMD64AVGQU:
484
485
486
487 p := s.Prog(x86.AADDQ)
488 p.From.Type = obj.TYPE_REG
489 p.To.Type = obj.TYPE_REG
490 p.To.Reg = v.Reg()
491 p.From.Reg = v.Args[1].Reg()
492 p = s.Prog(x86.ARCRQ)
493 p.From.Type = obj.TYPE_CONST
494 p.From.Offset = 1
495 p.To.Type = obj.TYPE_REG
496 p.To.Reg = v.Reg()
497
498 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
499 r := v.Reg0()
500 r0 := v.Args[0].Reg()
501 r1 := v.Args[1].Reg()
502 switch r {
503 case r0:
504 p := s.Prog(v.Op.Asm())
505 p.From.Type = obj.TYPE_REG
506 p.From.Reg = r1
507 p.To.Type = obj.TYPE_REG
508 p.To.Reg = r
509 case r1:
510 p := s.Prog(v.Op.Asm())
511 p.From.Type = obj.TYPE_REG
512 p.From.Reg = r0
513 p.To.Type = obj.TYPE_REG
514 p.To.Reg = r
515 default:
516 v.Fatalf("output not in same register as an input %s", v.LongString())
517 }
518
519 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
520 p := s.Prog(v.Op.Asm())
521 p.From.Type = obj.TYPE_REG
522 p.From.Reg = v.Args[1].Reg()
523 p.To.Type = obj.TYPE_REG
524 p.To.Reg = v.Reg0()
525
526 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
527 p := s.Prog(v.Op.Asm())
528 p.From.Type = obj.TYPE_CONST
529 p.From.Offset = v.AuxInt
530 p.To.Type = obj.TYPE_REG
531 p.To.Reg = v.Reg0()
532
533 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
534 r := v.Reg()
535 a := v.Args[0].Reg()
536 if r == a {
537 switch v.AuxInt {
538 case 1:
539 var asm obj.As
540
541
542
543
544
545 if v.Op == ssa.OpAMD64ADDQconst {
546 asm = x86.AINCQ
547 } else {
548 asm = x86.AINCL
549 }
550 p := s.Prog(asm)
551 p.To.Type = obj.TYPE_REG
552 p.To.Reg = r
553 return
554 case -1:
555 var asm obj.As
556 if v.Op == ssa.OpAMD64ADDQconst {
557 asm = x86.ADECQ
558 } else {
559 asm = x86.ADECL
560 }
561 p := s.Prog(asm)
562 p.To.Type = obj.TYPE_REG
563 p.To.Reg = r
564 return
565 case 0x80:
566
567
568 asm := x86.ASUBL
569 if v.Op == ssa.OpAMD64ADDQconst {
570 asm = x86.ASUBQ
571 }
572 p := s.Prog(asm)
573 p.From.Type = obj.TYPE_CONST
574 p.From.Offset = -0x80
575 p.To.Type = obj.TYPE_REG
576 p.To.Reg = r
577 return
578
579 }
580 p := s.Prog(v.Op.Asm())
581 p.From.Type = obj.TYPE_CONST
582 p.From.Offset = v.AuxInt
583 p.To.Type = obj.TYPE_REG
584 p.To.Reg = r
585 return
586 }
587 var asm obj.As
588 if v.Op == ssa.OpAMD64ADDQconst {
589 asm = x86.ALEAQ
590 } else {
591 asm = x86.ALEAL
592 }
593 p := s.Prog(asm)
594 p.From.Type = obj.TYPE_MEM
595 p.From.Reg = a
596 p.From.Offset = v.AuxInt
597 p.To.Type = obj.TYPE_REG
598 p.To.Reg = r
599
600 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
601 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
602 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
603 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
604 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
605 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
606 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
607 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
608 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
609 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
610 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
611 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
612 p := s.Prog(v.Op.Asm())
613 p.From.Type = obj.TYPE_REG
614 p.From.Reg = v.Args[1].Reg()
615 p.To.Type = obj.TYPE_REG
616 p.To.Reg = v.Reg()
617
618 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
619
620
621
622
623 p := s.Prog(v.Op.Asm())
624 p.From.Type = obj.TYPE_REG
625 p.From.Reg = v.Args[1].Reg()
626 p.To.Type = obj.TYPE_REG
627 p.To.Reg = v.Reg()
628 var q *obj.Prog
629 if v.Op == ssa.OpAMD64CMOVQNEF {
630 q = s.Prog(x86.ACMOVQPS)
631 } else if v.Op == ssa.OpAMD64CMOVLNEF {
632 q = s.Prog(x86.ACMOVLPS)
633 } else {
634 q = s.Prog(x86.ACMOVWPS)
635 }
636 q.From.Type = obj.TYPE_REG
637 q.From.Reg = v.Args[1].Reg()
638 q.To.Type = obj.TYPE_REG
639 q.To.Reg = v.Reg()
640
641 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
642
643
644
645
646
647
648
649
650
651
652
653 t := v.RegTmp()
654 opregreg(s, moveByRegsWidth(t, v.Args[1].Reg(), v.Type.Size()), t, v.Args[1].Reg())
655
656 p := s.Prog(v.Op.Asm())
657 p.From.Type = obj.TYPE_REG
658 p.From.Reg = v.Reg()
659 p.To.Type = obj.TYPE_REG
660 p.To.Reg = t
661 var q *obj.Prog
662 if v.Op == ssa.OpAMD64CMOVQEQF {
663 q = s.Prog(x86.ACMOVQPC)
664 } else if v.Op == ssa.OpAMD64CMOVLEQF {
665 q = s.Prog(x86.ACMOVLPC)
666 } else {
667 q = s.Prog(x86.ACMOVWPC)
668 }
669 q.From.Type = obj.TYPE_REG
670 q.From.Reg = t
671 q.To.Type = obj.TYPE_REG
672 q.To.Reg = v.Reg()
673
674 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
675 r := v.Reg()
676 p := s.Prog(v.Op.Asm())
677 p.From.Type = obj.TYPE_CONST
678 p.From.Offset = v.AuxInt
679 p.To.Type = obj.TYPE_REG
680 p.To.Reg = r
681 p.AddRestSourceReg(v.Args[0].Reg())
682
683 case ssa.OpAMD64ANDQconst:
684 asm := v.Op.Asm()
685
686
687 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
688 asm = x86.AANDL
689 }
690 p := s.Prog(asm)
691 p.From.Type = obj.TYPE_CONST
692 p.From.Offset = v.AuxInt
693 p.To.Type = obj.TYPE_REG
694 p.To.Reg = v.Reg()
695
696 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
697 ssa.OpAMD64ANDLconst,
698 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
699 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
700 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
701 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
702 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
703 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
704 p := s.Prog(v.Op.Asm())
705 p.From.Type = obj.TYPE_CONST
706 p.From.Offset = v.AuxInt
707 p.To.Type = obj.TYPE_REG
708 p.To.Reg = v.Reg()
709 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
710 r := v.Reg()
711 p := s.Prog(v.Op.Asm())
712 p.From.Type = obj.TYPE_REG
713 p.From.Reg = r
714 p.To.Type = obj.TYPE_REG
715 p.To.Reg = r
716 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
717 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
718 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
719 p := s.Prog(v.Op.Asm())
720 memIdx(&p.From, v)
721 o := v.Reg()
722 p.To.Type = obj.TYPE_REG
723 p.To.Reg = o
724 if v.AuxInt != 0 && v.Aux == nil {
725
726 switch v.Op {
727 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
728 p = s.Prog(x86.ALEAQ)
729 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
730 p = s.Prog(x86.ALEAL)
731 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
732 p = s.Prog(x86.ALEAW)
733 }
734 p.From.Type = obj.TYPE_MEM
735 p.From.Reg = o
736 p.To.Type = obj.TYPE_REG
737 p.To.Reg = o
738 }
739 ssagen.AddAux(&p.From, v)
740 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
741 p := s.Prog(v.Op.Asm())
742 p.From.Type = obj.TYPE_MEM
743 p.From.Reg = v.Args[0].Reg()
744 ssagen.AddAux(&p.From, v)
745 p.To.Type = obj.TYPE_REG
746 p.To.Reg = v.Reg()
747 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
748 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
749 ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
750 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
751 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
752
753
754 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
755 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
756 p := s.Prog(v.Op.Asm())
757 p.From.Type = obj.TYPE_REG
758 p.From.Reg = v.Args[0].Reg()
759 p.To.Type = obj.TYPE_CONST
760 p.To.Offset = v.AuxInt
761 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
762 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
763 ssa.OpAMD64BTSQconst,
764 ssa.OpAMD64BTCQconst,
765 ssa.OpAMD64BTRQconst:
766 op := v.Op
767 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
768
769 op = ssa.OpAMD64BTLconst
770 }
771 p := s.Prog(op.Asm())
772 p.From.Type = obj.TYPE_CONST
773 p.From.Offset = v.AuxInt
774 p.To.Type = obj.TYPE_REG
775 p.To.Reg = v.Args[0].Reg()
776 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
777 p := s.Prog(v.Op.Asm())
778 p.From.Type = obj.TYPE_MEM
779 p.From.Reg = v.Args[0].Reg()
780 ssagen.AddAux(&p.From, v)
781 p.To.Type = obj.TYPE_REG
782 p.To.Reg = v.Args[1].Reg()
783 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
784 sc := v.AuxValAndOff()
785 p := s.Prog(v.Op.Asm())
786 p.From.Type = obj.TYPE_MEM
787 p.From.Reg = v.Args[0].Reg()
788 ssagen.AddAux2(&p.From, v, sc.Off64())
789 p.To.Type = obj.TYPE_CONST
790 p.To.Offset = sc.Val64()
791 case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
792 p := s.Prog(v.Op.Asm())
793 memIdx(&p.From, v)
794 ssagen.AddAux(&p.From, v)
795 p.To.Type = obj.TYPE_REG
796 p.To.Reg = v.Args[2].Reg()
797 case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
798 sc := v.AuxValAndOff()
799 p := s.Prog(v.Op.Asm())
800 memIdx(&p.From, v)
801 ssagen.AddAux2(&p.From, v, sc.Off64())
802 p.To.Type = obj.TYPE_CONST
803 p.To.Offset = sc.Val64()
804 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
805 x := v.Reg()
806
807
808
809 if v.AuxInt == 0 && v.Aux == nil {
810 opregreg(s, x86.AXORL, x, x)
811 break
812 }
813
814 asm := v.Op.Asm()
815
816
817 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
818
819 asm = x86.AMOVL
820 }
821 p := s.Prog(asm)
822 p.From.Type = obj.TYPE_CONST
823 p.From.Offset = v.AuxInt
824 p.To.Type = obj.TYPE_REG
825 p.To.Reg = x
826
827 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
828 x := v.Reg()
829 if !isFPReg(x) && v.AuxInt == 0 && v.Aux == nil {
830 opregreg(s, x86.AXORL, x, x)
831 break
832 }
833 p := s.Prog(storeByRegWidth(x, v.Type.Size()))
834 p.From.Type = obj.TYPE_FCONST
835 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
836 p.To.Type = obj.TYPE_REG
837 p.To.Reg = x
838 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
839 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
840 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
841 p := s.Prog(v.Op.Asm())
842 p.From.Type = obj.TYPE_MEM
843 p.From.Reg = v.Args[0].Reg()
844 ssagen.AddAux(&p.From, v)
845 p.To.Type = obj.TYPE_REG
846 p.To.Reg = v.Reg()
847 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
848 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
849 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
850 p := s.Prog(v.Op.Asm())
851 memIdx(&p.From, v)
852 ssagen.AddAux(&p.From, v)
853 p.To.Type = obj.TYPE_REG
854 p.To.Reg = v.Reg()
855 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
856 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
857 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
858 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
859 p := s.Prog(v.Op.Asm())
860 p.From.Type = obj.TYPE_REG
861 p.From.Reg = v.Args[1].Reg()
862 p.To.Type = obj.TYPE_MEM
863 p.To.Reg = v.Args[0].Reg()
864 ssagen.AddAux(&p.To, v)
865 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
866 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
867 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
868 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
869 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
870 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
871 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
872 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
873 p := s.Prog(v.Op.Asm())
874 p.From.Type = obj.TYPE_REG
875 p.From.Reg = v.Args[2].Reg()
876 memIdx(&p.To, v)
877 ssagen.AddAux(&p.To, v)
878 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
879 sc := v.AuxValAndOff()
880 off := sc.Off64()
881 val := sc.Val()
882 if val == 1 || val == -1 {
883 var asm obj.As
884 if v.Op == ssa.OpAMD64ADDQconstmodify {
885 if val == 1 {
886 asm = x86.AINCQ
887 } else {
888 asm = x86.ADECQ
889 }
890 } else {
891 if val == 1 {
892 asm = x86.AINCL
893 } else {
894 asm = x86.ADECL
895 }
896 }
897 p := s.Prog(asm)
898 p.To.Type = obj.TYPE_MEM
899 p.To.Reg = v.Args[0].Reg()
900 ssagen.AddAux2(&p.To, v, off)
901 break
902 }
903 fallthrough
904 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
905 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
906 ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
907 sc := v.AuxValAndOff()
908 off := sc.Off64()
909 val := sc.Val64()
910 p := s.Prog(v.Op.Asm())
911 p.From.Type = obj.TYPE_CONST
912 p.From.Offset = val
913 p.To.Type = obj.TYPE_MEM
914 p.To.Reg = v.Args[0].Reg()
915 ssagen.AddAux2(&p.To, v, off)
916
917 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
918 p := s.Prog(v.Op.Asm())
919 p.From.Type = obj.TYPE_CONST
920 sc := v.AuxValAndOff()
921 p.From.Offset = sc.Val64()
922 p.To.Type = obj.TYPE_MEM
923 p.To.Reg = v.Args[0].Reg()
924 ssagen.AddAux2(&p.To, v, sc.Off64())
925 case ssa.OpAMD64MOVOstoreconst:
926 sc := v.AuxValAndOff()
927 if sc.Val() != 0 {
928 v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
929 }
930
931 if s.ABI != obj.ABIInternal {
932
933 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
934 }
935 p := s.Prog(v.Op.Asm())
936 p.From.Type = obj.TYPE_REG
937 p.From.Reg = x86.REG_X15
938 p.To.Type = obj.TYPE_MEM
939 p.To.Reg = v.Args[0].Reg()
940 ssagen.AddAux2(&p.To, v, sc.Off64())
941
942 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
943 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
944 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
945 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
946 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
947 p := s.Prog(v.Op.Asm())
948 p.From.Type = obj.TYPE_CONST
949 sc := v.AuxValAndOff()
950 p.From.Offset = sc.Val64()
951 switch {
952 case p.As == x86.AADDQ && p.From.Offset == 1:
953 p.As = x86.AINCQ
954 p.From.Type = obj.TYPE_NONE
955 case p.As == x86.AADDQ && p.From.Offset == -1:
956 p.As = x86.ADECQ
957 p.From.Type = obj.TYPE_NONE
958 case p.As == x86.AADDL && p.From.Offset == 1:
959 p.As = x86.AINCL
960 p.From.Type = obj.TYPE_NONE
961 case p.As == x86.AADDL && p.From.Offset == -1:
962 p.As = x86.ADECL
963 p.From.Type = obj.TYPE_NONE
964 }
965 memIdx(&p.To, v)
966 ssagen.AddAux2(&p.To, v, sc.Off64())
967 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
968 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
969 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS, ssa.OpAMD64VPBROADCASTB, ssa.OpAMD64PMOVMSKB:
970 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
971 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
972 r := v.Reg()
973
974 opregreg(s, x86.AXORPS, r, r)
975 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
976 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
977 var p *obj.Prog
978 switch v.Op {
979 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
980 p = s.Prog(x86.AMOVQ)
981 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
982 p = s.Prog(x86.AMOVL)
983 }
984 p.From.Type = obj.TYPE_REG
985 p.From.Reg = v.Args[0].Reg()
986 p.To.Type = obj.TYPE_REG
987 p.To.Reg = v.Reg()
988 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
989 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
990 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
991 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
992 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
993 p := s.Prog(v.Op.Asm())
994 p.From.Type = obj.TYPE_MEM
995 p.From.Reg = v.Args[1].Reg()
996 ssagen.AddAux(&p.From, v)
997 p.To.Type = obj.TYPE_REG
998 p.To.Reg = v.Reg()
999 case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
1000 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
1001 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
1002 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
1003 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
1004 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
1005 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
1006 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
1007 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
1008 p := s.Prog(v.Op.Asm())
1009
1010 r, i := v.Args[1].Reg(), v.Args[2].Reg()
1011 p.From.Type = obj.TYPE_MEM
1012 p.From.Scale = v.Op.Scale()
1013 if p.From.Scale == 1 && i == x86.REG_SP {
1014 r, i = i, r
1015 }
1016 p.From.Reg = r
1017 p.From.Index = i
1018
1019 ssagen.AddAux(&p.From, v)
1020 p.To.Type = obj.TYPE_REG
1021 p.To.Reg = v.Reg()
1022
1023 case ssa.OpAMD64LoweredZero:
1024 if s.ABI != obj.ABIInternal {
1025
1026 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1027 }
1028 ptrReg := v.Args[0].Reg()
1029 n := v.AuxInt
1030 if n < 16 {
1031 v.Fatalf("Zero too small %d", n)
1032 }
1033 zero16 := func(off int64) {
1034 zero16(s, ptrReg, off)
1035 }
1036
1037
1038 var off int64
1039 for n >= 16 {
1040 zero16(off)
1041 off += 16
1042 n -= 16
1043 }
1044 if n != 0 {
1045
1046
1047 zero16(off + n - 16)
1048 }
1049
1050 case ssa.OpAMD64LoweredZeroLoop:
1051 if s.ABI != obj.ABIInternal {
1052
1053 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1054 }
1055 ptrReg := v.Args[0].Reg()
1056 countReg := v.RegTmp()
1057 n := v.AuxInt
1058 loopSize := int64(64)
1059 if n < 3*loopSize {
1060
1061
1062
1063
1064
1065
1066
1067
1068 v.Fatalf("ZeroLoop size too small %d", n)
1069 }
1070 zero16 := func(off int64) {
1071 zero16(s, ptrReg, off)
1072 }
1073
1074
1075
1076 p := s.Prog(x86.AMOVL)
1077 p.From.Type = obj.TYPE_CONST
1078 p.From.Offset = n / loopSize
1079 p.To.Type = obj.TYPE_REG
1080 p.To.Reg = countReg
1081 cntInit := p
1082
1083
1084 for i := range loopSize / 16 {
1085 zero16(i * 16)
1086 }
1087
1088 p = s.Prog(x86.AADDQ)
1089 p.From.Type = obj.TYPE_CONST
1090 p.From.Offset = loopSize
1091 p.To.Type = obj.TYPE_REG
1092 p.To.Reg = ptrReg
1093
1094 p = s.Prog(x86.ADECL)
1095 p.To.Type = obj.TYPE_REG
1096 p.To.Reg = countReg
1097
1098
1099 p = s.Prog(x86.AJNE)
1100 p.To.Type = obj.TYPE_BRANCH
1101 p.To.SetTarget(cntInit.Link)
1102
1103
1104 n %= loopSize
1105
1106
1107 var off int64
1108 for n >= 16 {
1109 zero16(off)
1110 off += 16
1111 n -= 16
1112 }
1113 if n != 0 {
1114
1115
1116 zero16(off + n - 16)
1117 }
1118
1119 case ssa.OpAMD64LoweredMove:
1120 dstReg := v.Args[0].Reg()
1121 srcReg := v.Args[1].Reg()
1122 if dstReg == srcReg {
1123 break
1124 }
1125 tmpReg := int16(x86.REG_X14)
1126 n := v.AuxInt
1127 if n < 16 {
1128 v.Fatalf("Move too small %d", n)
1129 }
1130
1131 move16 := func(off int64) {
1132 move16(s, srcReg, dstReg, tmpReg, off)
1133 }
1134
1135
1136 var off int64
1137 for n >= 16 {
1138 move16(off)
1139 off += 16
1140 n -= 16
1141 }
1142 if n != 0 {
1143
1144
1145 move16(off + n - 16)
1146 }
1147
1148 case ssa.OpAMD64LoweredMoveLoop:
1149 dstReg := v.Args[0].Reg()
1150 srcReg := v.Args[1].Reg()
1151 if dstReg == srcReg {
1152 break
1153 }
1154 countReg := v.RegTmp()
1155 tmpReg := int16(x86.REG_X14)
1156 n := v.AuxInt
1157 loopSize := int64(64)
1158 if n < 3*loopSize {
1159
1160
1161
1162
1163
1164
1165
1166
1167 v.Fatalf("ZeroLoop size too small %d", n)
1168 }
1169
1170 move16 := func(off int64) {
1171 move16(s, srcReg, dstReg, tmpReg, off)
1172 }
1173
1174
1175
1176 p := s.Prog(x86.AMOVL)
1177 p.From.Type = obj.TYPE_CONST
1178 p.From.Offset = n / loopSize
1179 p.To.Type = obj.TYPE_REG
1180 p.To.Reg = countReg
1181 cntInit := p
1182
1183
1184 for i := range loopSize / 16 {
1185 move16(i * 16)
1186 }
1187
1188 p = s.Prog(x86.AADDQ)
1189 p.From.Type = obj.TYPE_CONST
1190 p.From.Offset = loopSize
1191 p.To.Type = obj.TYPE_REG
1192 p.To.Reg = srcReg
1193
1194 p = s.Prog(x86.AADDQ)
1195 p.From.Type = obj.TYPE_CONST
1196 p.From.Offset = loopSize
1197 p.To.Type = obj.TYPE_REG
1198 p.To.Reg = dstReg
1199
1200 p = s.Prog(x86.ADECL)
1201 p.To.Type = obj.TYPE_REG
1202 p.To.Reg = countReg
1203
1204
1205 p = s.Prog(x86.AJNE)
1206 p.To.Type = obj.TYPE_BRANCH
1207 p.To.SetTarget(cntInit.Link)
1208
1209
1210 n %= loopSize
1211
1212
1213 var off int64
1214 for n >= 16 {
1215 move16(off)
1216 off += 16
1217 n -= 16
1218 }
1219 if n != 0 {
1220
1221 move16(off + n - 16)
1222 }
1223
1224 case ssa.OpCopy:
1225 if v.Type.IsMemory() {
1226 return
1227 }
1228 x := v.Args[0].Reg()
1229 y := v.Reg()
1230 if v.Type.IsSIMD() {
1231 x = simdOrMaskReg(v.Args[0])
1232 y = simdOrMaskReg(v)
1233 }
1234 if x != y {
1235 opregreg(s, moveByRegsWidth(y, x, v.Type.Size()), y, x)
1236 }
1237 case ssa.OpLoadReg:
1238 if v.Type.IsFlags() {
1239 v.Fatalf("load flags not implemented: %v", v.LongString())
1240 return
1241 }
1242 r := v.Reg()
1243 p := s.Prog(loadByRegWidth(r, v.Type.Size()))
1244 ssagen.AddrAuto(&p.From, v.Args[0])
1245 p.To.Type = obj.TYPE_REG
1246 if v.Type.IsSIMD() {
1247 r = simdOrMaskReg(v)
1248 }
1249 p.To.Reg = r
1250
1251 case ssa.OpStoreReg:
1252 if v.Type.IsFlags() {
1253 v.Fatalf("store flags not implemented: %v", v.LongString())
1254 return
1255 }
1256 r := v.Args[0].Reg()
1257 if v.Type.IsSIMD() {
1258 r = simdOrMaskReg(v.Args[0])
1259 }
1260 p := s.Prog(storeByRegWidth(r, v.Type.Size()))
1261 p.From.Type = obj.TYPE_REG
1262 p.From.Reg = r
1263 ssagen.AddrAuto(&p.To, v)
1264 case ssa.OpAMD64LoweredHasCPUFeature:
1265 p := s.Prog(x86.AMOVBLZX)
1266 p.From.Type = obj.TYPE_MEM
1267 ssagen.AddAux(&p.From, v)
1268 p.To.Type = obj.TYPE_REG
1269 p.To.Reg = v.Reg()
1270 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
1271
1272
1273 for _, ap := range v.Block.Func.RegArgs {
1274
1275 addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
1276 reg := ap.Reg
1277 t := ap.Type
1278 sz := t.Size()
1279 if t.IsSIMD() {
1280 reg = simdRegBySize(reg, sz)
1281 }
1282 s.FuncInfo().AddSpill(
1283 obj.RegSpill{Reg: reg, Addr: addr, Unspill: loadByRegWidth(reg, sz), Spill: storeByRegWidth(reg, sz)})
1284 }
1285 v.Block.Func.RegArgs = nil
1286 ssagen.CheckArgReg(v)
1287 case ssa.OpAMD64LoweredGetClosurePtr:
1288
1289 ssagen.CheckLoweredGetClosurePtr(v)
1290 case ssa.OpAMD64LoweredGetG:
1291 if s.ABI == obj.ABIInternal {
1292 v.Fatalf("LoweredGetG should not appear in ABIInternal")
1293 }
1294 r := v.Reg()
1295 getgFromTLS(s, r)
1296 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
1297 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
1298
1299 zeroX15(s)
1300
1301 getgFromTLS(s, x86.REG_R14)
1302 }
1303 if v.Op == ssa.OpAMD64CALLtail {
1304 s.TailCall(v)
1305 break
1306 }
1307 s.Call(v)
1308 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
1309
1310 zeroX15(s)
1311
1312 getgFromTLS(s, x86.REG_R14)
1313 }
1314 case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
1315 s.Call(v)
1316
1317 case ssa.OpAMD64LoweredGetCallerPC:
1318 p := s.Prog(x86.AMOVQ)
1319 p.From.Type = obj.TYPE_MEM
1320 p.From.Offset = -8
1321 p.From.Name = obj.NAME_PARAM
1322 p.To.Type = obj.TYPE_REG
1323 p.To.Reg = v.Reg()
1324
1325 case ssa.OpAMD64LoweredGetCallerSP:
1326
1327 mov := x86.AMOVQ
1328 if types.PtrSize == 4 {
1329 mov = x86.AMOVL
1330 }
1331 p := s.Prog(mov)
1332 p.From.Type = obj.TYPE_ADDR
1333 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
1334 p.From.Name = obj.NAME_PARAM
1335 p.To.Type = obj.TYPE_REG
1336 p.To.Reg = v.Reg()
1337
1338 case ssa.OpAMD64LoweredWB:
1339 p := s.Prog(obj.ACALL)
1340 p.To.Type = obj.TYPE_MEM
1341 p.To.Name = obj.NAME_EXTERN
1342
1343 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1344
1345 case ssa.OpAMD64LoweredPanicBoundsRR, ssa.OpAMD64LoweredPanicBoundsRC, ssa.OpAMD64LoweredPanicBoundsCR, ssa.OpAMD64LoweredPanicBoundsCC:
1346
1347 code, signed := ssa.BoundsKind(v.AuxInt).Code()
1348 xIsReg := false
1349 yIsReg := false
1350 xVal := 0
1351 yVal := 0
1352 switch v.Op {
1353 case ssa.OpAMD64LoweredPanicBoundsRR:
1354 xIsReg = true
1355 xVal = int(v.Args[0].Reg() - x86.REG_AX)
1356 yIsReg = true
1357 yVal = int(v.Args[1].Reg() - x86.REG_AX)
1358 case ssa.OpAMD64LoweredPanicBoundsRC:
1359 xIsReg = true
1360 xVal = int(v.Args[0].Reg() - x86.REG_AX)
1361 c := v.Aux.(ssa.PanicBoundsC).C
1362 if c >= 0 && c <= abi.BoundsMaxConst {
1363 yVal = int(c)
1364 } else {
1365
1366 yIsReg = true
1367 if yVal == xVal {
1368 yVal = 1
1369 }
1370 p := s.Prog(x86.AMOVQ)
1371 p.From.Type = obj.TYPE_CONST
1372 p.From.Offset = c
1373 p.To.Type = obj.TYPE_REG
1374 p.To.Reg = x86.REG_AX + int16(yVal)
1375 }
1376 case ssa.OpAMD64LoweredPanicBoundsCR:
1377 yIsReg = true
1378 yVal = int(v.Args[0].Reg() - x86.REG_AX)
1379 c := v.Aux.(ssa.PanicBoundsC).C
1380 if c >= 0 && c <= abi.BoundsMaxConst {
1381 xVal = int(c)
1382 } else {
1383
1384 xIsReg = true
1385 if xVal == yVal {
1386 xVal = 1
1387 }
1388 p := s.Prog(x86.AMOVQ)
1389 p.From.Type = obj.TYPE_CONST
1390 p.From.Offset = c
1391 p.To.Type = obj.TYPE_REG
1392 p.To.Reg = x86.REG_AX + int16(xVal)
1393 }
1394 case ssa.OpAMD64LoweredPanicBoundsCC:
1395 c := v.Aux.(ssa.PanicBoundsCC).Cx
1396 if c >= 0 && c <= abi.BoundsMaxConst {
1397 xVal = int(c)
1398 } else {
1399
1400 xIsReg = true
1401 p := s.Prog(x86.AMOVQ)
1402 p.From.Type = obj.TYPE_CONST
1403 p.From.Offset = c
1404 p.To.Type = obj.TYPE_REG
1405 p.To.Reg = x86.REG_AX + int16(xVal)
1406 }
1407 c = v.Aux.(ssa.PanicBoundsCC).Cy
1408 if c >= 0 && c <= abi.BoundsMaxConst {
1409 yVal = int(c)
1410 } else {
1411
1412 yIsReg = true
1413 yVal = 1
1414 p := s.Prog(x86.AMOVQ)
1415 p.From.Type = obj.TYPE_CONST
1416 p.From.Offset = c
1417 p.To.Type = obj.TYPE_REG
1418 p.To.Reg = x86.REG_AX + int16(yVal)
1419 }
1420 }
1421 c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
1422
1423 p := s.Prog(obj.APCDATA)
1424 p.From.SetConst(abi.PCDATA_PanicBounds)
1425 p.To.SetConst(int64(c))
1426 p = s.Prog(obj.ACALL)
1427 p.To.Type = obj.TYPE_MEM
1428 p.To.Name = obj.NAME_EXTERN
1429 p.To.Sym = ir.Syms.PanicBounds
1430
1431 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
1432 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
1433 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
1434 p := s.Prog(v.Op.Asm())
1435 p.To.Type = obj.TYPE_REG
1436 p.To.Reg = v.Reg()
1437
1438 case ssa.OpAMD64NEGLflags:
1439 p := s.Prog(v.Op.Asm())
1440 p.To.Type = obj.TYPE_REG
1441 p.To.Reg = v.Reg0()
1442
1443 case ssa.OpAMD64ADDQconstflags, ssa.OpAMD64ADDLconstflags:
1444 p := s.Prog(v.Op.Asm())
1445 p.From.Type = obj.TYPE_CONST
1446 p.From.Offset = v.AuxInt
1447
1448
1449
1450
1451 switch {
1452 case p.As == x86.AADDQ && p.From.Offset == 1:
1453 p.As = x86.AINCQ
1454 p.From.Type = obj.TYPE_NONE
1455 case p.As == x86.AADDQ && p.From.Offset == -1:
1456 p.As = x86.ADECQ
1457 p.From.Type = obj.TYPE_NONE
1458 case p.As == x86.AADDL && p.From.Offset == 1:
1459 p.As = x86.AINCL
1460 p.From.Type = obj.TYPE_NONE
1461 case p.As == x86.AADDL && p.From.Offset == -1:
1462 p.As = x86.ADECL
1463 p.From.Type = obj.TYPE_NONE
1464 }
1465 p.To.Type = obj.TYPE_REG
1466 p.To.Reg = v.Reg0()
1467
1468 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1469 p := s.Prog(v.Op.Asm())
1470 p.From.Type = obj.TYPE_REG
1471 p.From.Reg = v.Args[0].Reg()
1472 p.To.Type = obj.TYPE_REG
1473 switch v.Op {
1474 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
1475 p.To.Reg = v.Reg0()
1476 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1477 p.To.Reg = v.Reg()
1478 }
1479 case ssa.OpAMD64LoweredRound32F, ssa.OpAMD64LoweredRound64F:
1480
1481 case ssa.OpAMD64ROUNDSD:
1482 p := s.Prog(v.Op.Asm())
1483 val := v.AuxInt
1484
1485 if val < 0 || val > 3 {
1486 v.Fatalf("Invalid rounding mode")
1487 }
1488 p.From.Offset = val
1489 p.From.Type = obj.TYPE_CONST
1490 p.AddRestSourceReg(v.Args[0].Reg())
1491 p.To.Type = obj.TYPE_REG
1492 p.To.Reg = v.Reg()
1493 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
1494 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
1495 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
1496 if v.Args[0].Reg() != v.Reg() {
1497
1498
1499
1500 opregreg(s, x86.AXORL, v.Reg(), v.Reg())
1501 }
1502 p := s.Prog(v.Op.Asm())
1503 p.From.Type = obj.TYPE_REG
1504 p.From.Reg = v.Args[0].Reg()
1505 p.To.Type = obj.TYPE_REG
1506 p.To.Reg = v.Reg()
1507
1508 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
1509 ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
1510 ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
1511 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
1512 ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
1513 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
1514 ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
1515 ssa.OpAMD64SETO:
1516 p := s.Prog(v.Op.Asm())
1517 p.To.Type = obj.TYPE_REG
1518 p.To.Reg = v.Reg()
1519
1520 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
1521 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
1522 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
1523 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
1524 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
1525 p := s.Prog(v.Op.Asm())
1526 p.To.Type = obj.TYPE_MEM
1527 p.To.Reg = v.Args[0].Reg()
1528 ssagen.AddAux(&p.To, v)
1529
1530 case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
1531 ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
1532 ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
1533 ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
1534 ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
1535 p := s.Prog(v.Op.Asm())
1536 memIdx(&p.To, v)
1537 ssagen.AddAux(&p.To, v)
1538
1539 case ssa.OpAMD64SETNEF:
1540 t := v.RegTmp()
1541 p := s.Prog(v.Op.Asm())
1542 p.To.Type = obj.TYPE_REG
1543 p.To.Reg = v.Reg()
1544 q := s.Prog(x86.ASETPS)
1545 q.To.Type = obj.TYPE_REG
1546 q.To.Reg = t
1547
1548 opregreg(s, x86.AORL, v.Reg(), t)
1549
1550 case ssa.OpAMD64SETEQF:
1551 t := v.RegTmp()
1552 p := s.Prog(v.Op.Asm())
1553 p.To.Type = obj.TYPE_REG
1554 p.To.Reg = v.Reg()
1555 q := s.Prog(x86.ASETPC)
1556 q.To.Type = obj.TYPE_REG
1557 q.To.Reg = t
1558
1559 opregreg(s, x86.AANDL, v.Reg(), t)
1560
1561 case ssa.OpAMD64InvertFlags:
1562 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1563 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
1564 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1565 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
1566 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
1567 case ssa.OpAMD64REPSTOSQ:
1568 s.Prog(x86.AREP)
1569 s.Prog(x86.ASTOSQ)
1570 case ssa.OpAMD64REPMOVSQ:
1571 s.Prog(x86.AREP)
1572 s.Prog(x86.AMOVSQ)
1573 case ssa.OpAMD64LoweredNilCheck:
1574
1575
1576
1577
1578
1579
1580 p := s.Prog(x86.ATESTB)
1581 p.From.Type = obj.TYPE_REG
1582 p.From.Reg = x86.REG_AX
1583 p.To.Type = obj.TYPE_MEM
1584 p.To.Reg = v.Args[0].Reg()
1585 if logopt.Enabled() {
1586 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1587 }
1588 if base.Debug.Nil != 0 && v.Pos.Line() > 1 {
1589 base.WarnfAt(v.Pos, "generated nil check")
1590 }
1591 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
1592 p := s.Prog(v.Op.Asm())
1593 p.From.Type = obj.TYPE_MEM
1594 p.From.Reg = v.Args[0].Reg()
1595 ssagen.AddAux(&p.From, v)
1596 p.To.Type = obj.TYPE_REG
1597 p.To.Reg = v.Reg0()
1598 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
1599 p := s.Prog(v.Op.Asm())
1600 p.From.Type = obj.TYPE_REG
1601 p.From.Reg = v.Reg0()
1602 p.To.Type = obj.TYPE_MEM
1603 p.To.Reg = v.Args[1].Reg()
1604 ssagen.AddAux(&p.To, v)
1605 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
1606 s.Prog(x86.ALOCK)
1607 p := s.Prog(v.Op.Asm())
1608 p.From.Type = obj.TYPE_REG
1609 p.From.Reg = v.Reg0()
1610 p.To.Type = obj.TYPE_MEM
1611 p.To.Reg = v.Args[1].Reg()
1612 ssagen.AddAux(&p.To, v)
1613 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
1614 if v.Args[1].Reg() != x86.REG_AX {
1615 v.Fatalf("input[1] not in AX %s", v.LongString())
1616 }
1617 s.Prog(x86.ALOCK)
1618 p := s.Prog(v.Op.Asm())
1619 p.From.Type = obj.TYPE_REG
1620 p.From.Reg = v.Args[2].Reg()
1621 p.To.Type = obj.TYPE_MEM
1622 p.To.Reg = v.Args[0].Reg()
1623 ssagen.AddAux(&p.To, v)
1624 p = s.Prog(x86.ASETEQ)
1625 p.To.Type = obj.TYPE_REG
1626 p.To.Reg = v.Reg0()
1627 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ANDQlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock, ssa.OpAMD64ORQlock:
1628
1629 s.Prog(x86.ALOCK)
1630 p := s.Prog(v.Op.Asm())
1631 p.From.Type = obj.TYPE_REG
1632 p.From.Reg = v.Args[1].Reg()
1633 p.To.Type = obj.TYPE_MEM
1634 p.To.Reg = v.Args[0].Reg()
1635 ssagen.AddAux(&p.To, v)
1636 case ssa.OpAMD64LoweredAtomicAnd64, ssa.OpAMD64LoweredAtomicOr64, ssa.OpAMD64LoweredAtomicAnd32, ssa.OpAMD64LoweredAtomicOr32:
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 mov := x86.AMOVQ
1647 op := x86.AANDQ
1648 cmpxchg := x86.ACMPXCHGQ
1649 switch v.Op {
1650 case ssa.OpAMD64LoweredAtomicOr64:
1651 op = x86.AORQ
1652 case ssa.OpAMD64LoweredAtomicAnd32:
1653 mov = x86.AMOVL
1654 op = x86.AANDL
1655 cmpxchg = x86.ACMPXCHGL
1656 case ssa.OpAMD64LoweredAtomicOr32:
1657 mov = x86.AMOVL
1658 op = x86.AORL
1659 cmpxchg = x86.ACMPXCHGL
1660 }
1661 addr := v.Args[0].Reg()
1662 mask := v.Args[1].Reg()
1663 tmp := v.RegTmp()
1664 p1 := s.Prog(mov)
1665 p1.From.Type = obj.TYPE_REG
1666 p1.From.Reg = mask
1667 p1.To.Type = obj.TYPE_REG
1668 p1.To.Reg = tmp
1669 p2 := s.Prog(mov)
1670 p2.From.Type = obj.TYPE_MEM
1671 p2.From.Reg = addr
1672 ssagen.AddAux(&p2.From, v)
1673 p2.To.Type = obj.TYPE_REG
1674 p2.To.Reg = x86.REG_AX
1675 p3 := s.Prog(op)
1676 p3.From.Type = obj.TYPE_REG
1677 p3.From.Reg = x86.REG_AX
1678 p3.To.Type = obj.TYPE_REG
1679 p3.To.Reg = tmp
1680 s.Prog(x86.ALOCK)
1681 p5 := s.Prog(cmpxchg)
1682 p5.From.Type = obj.TYPE_REG
1683 p5.From.Reg = tmp
1684 p5.To.Type = obj.TYPE_MEM
1685 p5.To.Reg = addr
1686 ssagen.AddAux(&p5.To, v)
1687 p6 := s.Prog(x86.AJNE)
1688 p6.To.Type = obj.TYPE_BRANCH
1689 p6.To.SetTarget(p1)
1690 case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
1691 p := s.Prog(v.Op.Asm())
1692 p.From.Type = obj.TYPE_MEM
1693 p.From.Reg = v.Args[0].Reg()
1694 case ssa.OpClobber:
1695 p := s.Prog(x86.AMOVL)
1696 p.From.Type = obj.TYPE_CONST
1697 p.From.Offset = 0xdeaddead
1698 p.To.Type = obj.TYPE_MEM
1699 p.To.Reg = x86.REG_SP
1700 ssagen.AddAux(&p.To, v)
1701 p = s.Prog(x86.AMOVL)
1702 p.From.Type = obj.TYPE_CONST
1703 p.From.Offset = 0xdeaddead
1704 p.To.Type = obj.TYPE_MEM
1705 p.To.Reg = x86.REG_SP
1706 ssagen.AddAux(&p.To, v)
1707 p.To.Offset += 4
1708 case ssa.OpClobberReg:
1709 x := uint64(0xdeaddeaddeaddead)
1710 p := s.Prog(x86.AMOVQ)
1711 p.From.Type = obj.TYPE_CONST
1712 p.From.Offset = int64(x)
1713 p.To.Type = obj.TYPE_REG
1714 p.To.Reg = v.Reg()
1715
1716
1717 case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
1718 s.Prog(v.Op.Asm())
1719
1720 case ssa.OpAMD64Zero128:
1721
1722 case ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
1723 p := s.Prog(v.Op.Asm())
1724 p.From.Type = obj.TYPE_REG
1725 p.From.Reg = simdReg(v)
1726 p.AddRestSourceReg(simdReg(v))
1727 p.To.Type = obj.TYPE_REG
1728 p.To.Reg = simdReg(v)
1729
1730 case ssa.OpAMD64VMOVSSf2v, ssa.OpAMD64VMOVSDf2v:
1731
1732 p := s.Prog(v.Op.Asm())
1733 p.From.Type = obj.TYPE_REG
1734 p.From.Reg = v.Args[0].Reg()
1735 p.AddRestSourceReg(x86.REG_X15)
1736 p.To.Type = obj.TYPE_REG
1737 p.To.Reg = simdReg(v)
1738
1739 case ssa.OpAMD64VMOVQload, ssa.OpAMD64VMOVDload,
1740 ssa.OpAMD64VMOVSSload, ssa.OpAMD64VMOVSDload:
1741 p := s.Prog(v.Op.Asm())
1742 p.From.Type = obj.TYPE_MEM
1743 p.From.Reg = v.Args[0].Reg()
1744 ssagen.AddAux(&p.From, v)
1745 p.To.Type = obj.TYPE_REG
1746 p.To.Reg = simdReg(v)
1747
1748 case ssa.OpAMD64VMOVSSconst, ssa.OpAMD64VMOVSDconst:
1749
1750 x := simdReg(v)
1751 p := s.Prog(v.Op.Asm())
1752 p.From.Type = obj.TYPE_FCONST
1753 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
1754 p.To.Type = obj.TYPE_REG
1755 p.To.Reg = x
1756
1757 case ssa.OpAMD64VMOVD, ssa.OpAMD64VMOVQ:
1758
1759 p := s.Prog(v.Op.Asm())
1760 p.From.Type = obj.TYPE_REG
1761 p.From.Reg = v.Args[0].Reg()
1762 p.To.Type = obj.TYPE_REG
1763 p.To.Reg = simdReg(v)
1764
1765 case ssa.OpAMD64VMOVDQUload128, ssa.OpAMD64VMOVDQUload256, ssa.OpAMD64VMOVDQUload512,
1766 ssa.OpAMD64KMOVBload, ssa.OpAMD64KMOVWload, ssa.OpAMD64KMOVDload, ssa.OpAMD64KMOVQload:
1767 p := s.Prog(v.Op.Asm())
1768 p.From.Type = obj.TYPE_MEM
1769 p.From.Reg = v.Args[0].Reg()
1770 ssagen.AddAux(&p.From, v)
1771 p.To.Type = obj.TYPE_REG
1772 p.To.Reg = simdOrMaskReg(v)
1773 case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512,
1774 ssa.OpAMD64KMOVBstore, ssa.OpAMD64KMOVWstore, ssa.OpAMD64KMOVDstore, ssa.OpAMD64KMOVQstore:
1775 p := s.Prog(v.Op.Asm())
1776 p.From.Type = obj.TYPE_REG
1777 p.From.Reg = simdOrMaskReg(v.Args[1])
1778 p.To.Type = obj.TYPE_MEM
1779 p.To.Reg = v.Args[0].Reg()
1780 ssagen.AddAux(&p.To, v)
1781
1782 case ssa.OpAMD64VPMASK32load128, ssa.OpAMD64VPMASK64load128, ssa.OpAMD64VPMASK32load256, ssa.OpAMD64VPMASK64load256:
1783 p := s.Prog(v.Op.Asm())
1784 p.From.Type = obj.TYPE_MEM
1785 p.From.Reg = v.Args[0].Reg()
1786 ssagen.AddAux(&p.From, v)
1787 p.To.Type = obj.TYPE_REG
1788 p.To.Reg = simdReg(v)
1789 p.AddRestSourceReg(simdReg(v.Args[1]))
1790
1791 case ssa.OpAMD64VPMASK32store128, ssa.OpAMD64VPMASK64store128, ssa.OpAMD64VPMASK32store256, ssa.OpAMD64VPMASK64store256:
1792 p := s.Prog(v.Op.Asm())
1793 p.From.Type = obj.TYPE_REG
1794 p.From.Reg = simdReg(v.Args[2])
1795 p.To.Type = obj.TYPE_MEM
1796 p.To.Reg = v.Args[0].Reg()
1797 ssagen.AddAux(&p.To, v)
1798 p.AddRestSourceReg(simdReg(v.Args[1]))
1799
1800 case ssa.OpAMD64VPMASK64load512, ssa.OpAMD64VPMASK32load512, ssa.OpAMD64VPMASK16load512, ssa.OpAMD64VPMASK8load512:
1801 p := s.Prog(v.Op.Asm())
1802 p.From.Type = obj.TYPE_MEM
1803 p.From.Reg = v.Args[0].Reg()
1804 ssagen.AddAux(&p.From, v)
1805 p.To.Type = obj.TYPE_REG
1806 p.To.Reg = simdReg(v)
1807 p.AddRestSourceReg(v.Args[1].Reg())
1808 x86.ParseSuffix(p, "Z")
1809
1810 case ssa.OpAMD64VPMASK64store512, ssa.OpAMD64VPMASK32store512, ssa.OpAMD64VPMASK16store512, ssa.OpAMD64VPMASK8store512:
1811 p := s.Prog(v.Op.Asm())
1812 p.From.Type = obj.TYPE_REG
1813 p.From.Reg = simdReg(v.Args[2])
1814 p.To.Type = obj.TYPE_MEM
1815 p.To.Reg = v.Args[0].Reg()
1816 ssagen.AddAux(&p.To, v)
1817 p.AddRestSourceReg(v.Args[1].Reg())
1818
1819 case ssa.OpAMD64VPMOVMToVec8x16,
1820 ssa.OpAMD64VPMOVMToVec8x32,
1821 ssa.OpAMD64VPMOVMToVec8x64,
1822 ssa.OpAMD64VPMOVMToVec16x8,
1823 ssa.OpAMD64VPMOVMToVec16x16,
1824 ssa.OpAMD64VPMOVMToVec16x32,
1825 ssa.OpAMD64VPMOVMToVec32x4,
1826 ssa.OpAMD64VPMOVMToVec32x8,
1827 ssa.OpAMD64VPMOVMToVec32x16,
1828 ssa.OpAMD64VPMOVMToVec64x2,
1829 ssa.OpAMD64VPMOVMToVec64x4,
1830 ssa.OpAMD64VPMOVMToVec64x8:
1831 p := s.Prog(v.Op.Asm())
1832 p.From.Type = obj.TYPE_REG
1833 p.From.Reg = v.Args[0].Reg()
1834 p.To.Type = obj.TYPE_REG
1835 p.To.Reg = simdReg(v)
1836
1837 case ssa.OpAMD64VPMOVVec8x16ToM,
1838 ssa.OpAMD64VPMOVVec8x32ToM,
1839 ssa.OpAMD64VPMOVVec8x64ToM,
1840 ssa.OpAMD64VPMOVVec16x8ToM,
1841 ssa.OpAMD64VPMOVVec16x16ToM,
1842 ssa.OpAMD64VPMOVVec16x32ToM,
1843 ssa.OpAMD64VPMOVVec32x4ToM,
1844 ssa.OpAMD64VPMOVVec32x8ToM,
1845 ssa.OpAMD64VPMOVVec32x16ToM,
1846 ssa.OpAMD64VPMOVVec64x2ToM,
1847 ssa.OpAMD64VPMOVVec64x4ToM,
1848 ssa.OpAMD64VPMOVVec64x8ToM,
1849 ssa.OpAMD64VPMOVMSKB128,
1850 ssa.OpAMD64VPMOVMSKB256,
1851 ssa.OpAMD64VMOVMSKPS128,
1852 ssa.OpAMD64VMOVMSKPS256,
1853 ssa.OpAMD64VMOVMSKPD128,
1854 ssa.OpAMD64VMOVMSKPD256:
1855 p := s.Prog(v.Op.Asm())
1856 p.From.Type = obj.TYPE_REG
1857 p.From.Reg = simdReg(v.Args[0])
1858 p.To.Type = obj.TYPE_REG
1859 p.To.Reg = v.Reg()
1860
1861 case ssa.OpAMD64KMOVQk, ssa.OpAMD64KMOVDk, ssa.OpAMD64KMOVWk, ssa.OpAMD64KMOVBk,
1862 ssa.OpAMD64KMOVQi, ssa.OpAMD64KMOVDi, ssa.OpAMD64KMOVWi, ssa.OpAMD64KMOVBi:
1863
1864 p := s.Prog(v.Op.Asm())
1865 p.From.Type = obj.TYPE_REG
1866 p.From.Reg = v.Args[0].Reg()
1867 p.To.Type = obj.TYPE_REG
1868 p.To.Reg = v.Reg()
1869 case ssa.OpAMD64VPTEST:
1870
1871
1872 p := s.Prog(v.Op.Asm())
1873 p.From.Type = obj.TYPE_REG
1874 p.From.Reg = simdReg(v.Args[0])
1875 p.To.Type = obj.TYPE_REG
1876 p.To.Reg = simdReg(v.Args[1])
1877
1878 default:
1879 if !ssaGenSIMDValue(s, v) {
1880 v.Fatalf("genValue not implemented: %s", v.LongString())
1881 }
1882 }
1883 }
1884
1885
1886 func zeroX15(s *ssagen.State) {
1887 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1888 }
1889
1890
1891 func simdV11(s *ssagen.State, v *ssa.Value) *obj.Prog {
1892 p := s.Prog(v.Op.Asm())
1893 p.From.Type = obj.TYPE_REG
1894 p.From.Reg = simdReg(v.Args[0])
1895 p.To.Type = obj.TYPE_REG
1896 p.To.Reg = simdReg(v)
1897 return p
1898 }
1899
1900
1901 func simdV21(s *ssagen.State, v *ssa.Value) *obj.Prog {
1902 p := s.Prog(v.Op.Asm())
1903 p.From.Type = obj.TYPE_REG
1904
1905
1906 p.From.Reg = simdReg(v.Args[1])
1907 p.AddRestSourceReg(simdReg(v.Args[0]))
1908 p.To.Type = obj.TYPE_REG
1909 p.To.Reg = simdReg(v)
1910 return p
1911 }
1912
1913
1914
1915
1916 func simdVfpv(s *ssagen.State, v *ssa.Value) *obj.Prog {
1917 p := s.Prog(v.Op.Asm())
1918 p.From.Type = obj.TYPE_REG
1919
1920
1921 p.From.Reg = v.Args[1].Reg()
1922 p.AddRestSourceReg(simdReg(v.Args[0]))
1923 p.To.Type = obj.TYPE_REG
1924 p.To.Reg = simdReg(v)
1925 return p
1926 }
1927
1928
1929 func simdV2k(s *ssagen.State, v *ssa.Value) *obj.Prog {
1930 p := s.Prog(v.Op.Asm())
1931 p.From.Type = obj.TYPE_REG
1932 p.From.Reg = simdReg(v.Args[1])
1933 p.AddRestSourceReg(simdReg(v.Args[0]))
1934 p.To.Type = obj.TYPE_REG
1935 p.To.Reg = maskReg(v)
1936 return p
1937 }
1938
1939
1940 func simdV2kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
1941 p := s.Prog(v.Op.Asm())
1942 p.From.Type = obj.TYPE_REG
1943 p.From.Reg = simdReg(v.Args[1])
1944 p.AddRestSourceReg(simdReg(v.Args[0]))
1945
1946
1947
1948
1949
1950 p.AddRestSourceReg(maskReg(v.Args[2]))
1951 p.To.Type = obj.TYPE_REG
1952 p.To.Reg = simdReg(v)
1953 return p
1954 }
1955
1956
1957 func simdV2kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
1958 p := s.Prog(v.Op.Asm())
1959 p.From.Type = obj.TYPE_REG
1960 p.From.Reg = simdReg(v.Args[1])
1961
1962
1963
1964
1965
1966 p.AddRestSourceReg(maskReg(v.Args[2]))
1967 p.To.Type = obj.TYPE_REG
1968 p.To.Reg = simdReg(v)
1969 return p
1970 }
1971
1972
1973
1974
1975 func simdVfpkv(s *ssagen.State, v *ssa.Value) *obj.Prog {
1976 p := s.Prog(v.Op.Asm())
1977 p.From.Type = obj.TYPE_REG
1978 p.From.Reg = v.Args[1].Reg()
1979 p.AddRestSourceReg(simdReg(v.Args[0]))
1980 p.AddRestSourceReg(maskReg(v.Args[2]))
1981 p.To.Type = obj.TYPE_REG
1982 p.To.Reg = simdReg(v)
1983 return p
1984 }
1985
1986
1987 func simdV2kk(s *ssagen.State, v *ssa.Value) *obj.Prog {
1988 p := s.Prog(v.Op.Asm())
1989 p.From.Type = obj.TYPE_REG
1990 p.From.Reg = simdReg(v.Args[1])
1991 p.AddRestSourceReg(simdReg(v.Args[0]))
1992 p.AddRestSourceReg(maskReg(v.Args[2]))
1993 p.To.Type = obj.TYPE_REG
1994 p.To.Reg = maskReg(v)
1995 return p
1996 }
1997
1998
1999 func simdVkv(s *ssagen.State, v *ssa.Value) *obj.Prog {
2000 p := s.Prog(v.Op.Asm())
2001 p.From.Type = obj.TYPE_REG
2002 p.From.Reg = simdReg(v.Args[0])
2003 p.AddRestSourceReg(maskReg(v.Args[1]))
2004 p.To.Type = obj.TYPE_REG
2005 p.To.Reg = simdReg(v)
2006 return p
2007 }
2008
2009
2010 func simdV11Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2011 p := s.Prog(v.Op.Asm())
2012 p.From.Offset = int64(v.AuxUInt8())
2013 p.From.Type = obj.TYPE_CONST
2014 p.AddRestSourceReg(simdReg(v.Args[0]))
2015 p.To.Type = obj.TYPE_REG
2016 p.To.Reg = simdReg(v)
2017 return p
2018 }
2019
2020
2021 func simdVkvImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2022 p := s.Prog(v.Op.Asm())
2023 p.From.Offset = int64(v.AuxUInt8())
2024 p.From.Type = obj.TYPE_CONST
2025 p.AddRestSourceReg(simdReg(v.Args[0]))
2026 p.AddRestSourceReg(maskReg(v.Args[1]))
2027 p.To.Type = obj.TYPE_REG
2028 p.To.Reg = simdReg(v)
2029 return p
2030 }
2031
2032
2033 func simdV21Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2034 p := s.Prog(v.Op.Asm())
2035 p.From.Offset = int64(v.AuxUInt8())
2036 p.From.Type = obj.TYPE_CONST
2037 p.AddRestSourceReg(simdReg(v.Args[1]))
2038 p.AddRestSourceReg(simdReg(v.Args[0]))
2039 p.To.Type = obj.TYPE_REG
2040 p.To.Reg = simdReg(v)
2041 return p
2042 }
2043
2044
2045 func simdVgpvImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2046 p := s.Prog(v.Op.Asm())
2047 p.From.Offset = int64(v.AuxUInt8())
2048 p.From.Type = obj.TYPE_CONST
2049 p.AddRestSourceReg(v.Args[1].Reg())
2050 p.AddRestSourceReg(simdReg(v.Args[0]))
2051 p.To.Type = obj.TYPE_REG
2052 p.To.Reg = simdReg(v)
2053 return p
2054 }
2055
2056
2057 func simdV2kImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2058 p := s.Prog(v.Op.Asm())
2059 p.From.Offset = int64(v.AuxUInt8())
2060 p.From.Type = obj.TYPE_CONST
2061 p.AddRestSourceReg(simdReg(v.Args[1]))
2062 p.AddRestSourceReg(simdReg(v.Args[0]))
2063 p.To.Type = obj.TYPE_REG
2064 p.To.Reg = maskReg(v)
2065 return p
2066 }
2067
2068
2069 func simdV2kkImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2070 p := s.Prog(v.Op.Asm())
2071 p.From.Offset = int64(v.AuxUInt8())
2072 p.From.Type = obj.TYPE_CONST
2073 p.AddRestSourceReg(simdReg(v.Args[1]))
2074 p.AddRestSourceReg(simdReg(v.Args[0]))
2075 p.AddRestSourceReg(maskReg(v.Args[2]))
2076 p.To.Type = obj.TYPE_REG
2077 p.To.Reg = maskReg(v)
2078 return p
2079 }
2080
2081 func simdV2kvImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2082 p := s.Prog(v.Op.Asm())
2083 p.From.Offset = int64(v.AuxUInt8())
2084 p.From.Type = obj.TYPE_CONST
2085 p.AddRestSourceReg(simdReg(v.Args[1]))
2086 p.AddRestSourceReg(simdReg(v.Args[0]))
2087 p.AddRestSourceReg(maskReg(v.Args[2]))
2088 p.To.Type = obj.TYPE_REG
2089 p.To.Reg = simdReg(v)
2090 return p
2091 }
2092
2093
2094 func simdV31ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2095 p := s.Prog(v.Op.Asm())
2096 p.From.Type = obj.TYPE_REG
2097 p.From.Reg = simdReg(v.Args[2])
2098 p.AddRestSourceReg(simdReg(v.Args[1]))
2099 p.To.Type = obj.TYPE_REG
2100 p.To.Reg = simdReg(v)
2101 return p
2102 }
2103
2104 func simdV31ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2105 p := s.Prog(v.Op.Asm())
2106 p.From.Offset = int64(v.AuxUInt8())
2107 p.From.Type = obj.TYPE_CONST
2108
2109 p.AddRestSourceReg(simdReg(v.Args[2]))
2110 p.AddRestSourceReg(simdReg(v.Args[1]))
2111
2112 p.To.Type = obj.TYPE_REG
2113 p.To.Reg = simdReg(v)
2114 return p
2115 }
2116
2117
2118
2119
2120 func simdV31loadResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2121 sc := v.AuxValAndOff()
2122 p := s.Prog(v.Op.Asm())
2123
2124 p.From.Type = obj.TYPE_CONST
2125 p.From.Offset = sc.Val64()
2126
2127 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[2].Reg()}
2128 ssagen.AddAux2(&m, v, sc.Off64())
2129 p.AddRestSource(m)
2130
2131 p.AddRestSourceReg(simdReg(v.Args[1]))
2132 return p
2133 }
2134
2135
2136 func simdV3kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2137 p := s.Prog(v.Op.Asm())
2138 p.From.Type = obj.TYPE_REG
2139 p.From.Reg = simdReg(v.Args[2])
2140 p.AddRestSourceReg(simdReg(v.Args[1]))
2141 p.AddRestSourceReg(maskReg(v.Args[3]))
2142 p.To.Type = obj.TYPE_REG
2143 p.To.Reg = simdReg(v)
2144 return p
2145 }
2146
2147 func simdVgpImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2148 p := s.Prog(v.Op.Asm())
2149 p.From.Offset = int64(v.AuxUInt8())
2150 p.From.Type = obj.TYPE_CONST
2151 p.AddRestSourceReg(simdReg(v.Args[0]))
2152 p.To.Type = obj.TYPE_REG
2153 p.To.Reg = v.Reg()
2154 return p
2155 }
2156
2157
2158 func simdV31(s *ssagen.State, v *ssa.Value) *obj.Prog {
2159 p := s.Prog(v.Op.Asm())
2160 p.From.Type = obj.TYPE_REG
2161 p.From.Reg = simdReg(v.Args[2])
2162 p.AddRestSourceReg(simdReg(v.Args[1]))
2163 p.AddRestSourceReg(simdReg(v.Args[0]))
2164 p.To.Type = obj.TYPE_REG
2165 p.To.Reg = simdReg(v)
2166 return p
2167 }
2168
2169
2170 func simdV3kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
2171 p := s.Prog(v.Op.Asm())
2172 p.From.Type = obj.TYPE_REG
2173 p.From.Reg = simdReg(v.Args[2])
2174 p.AddRestSourceReg(simdReg(v.Args[1]))
2175 p.AddRestSourceReg(simdReg(v.Args[0]))
2176 p.AddRestSourceReg(maskReg(v.Args[3]))
2177 p.To.Type = obj.TYPE_REG
2178 p.To.Reg = simdReg(v)
2179 return p
2180 }
2181
2182
2183 func simdVkvload(s *ssagen.State, v *ssa.Value) *obj.Prog {
2184 p := s.Prog(v.Op.Asm())
2185 p.From.Type = obj.TYPE_MEM
2186 p.From.Reg = v.Args[0].Reg()
2187 ssagen.AddAux(&p.From, v)
2188 p.AddRestSourceReg(maskReg(v.Args[1]))
2189 p.To.Type = obj.TYPE_REG
2190 p.To.Reg = simdReg(v)
2191 return p
2192 }
2193
2194
2195 func simdV21load(s *ssagen.State, v *ssa.Value) *obj.Prog {
2196 p := s.Prog(v.Op.Asm())
2197 p.From.Type = obj.TYPE_MEM
2198 p.From.Reg = v.Args[1].Reg()
2199 ssagen.AddAux(&p.From, v)
2200 p.AddRestSourceReg(simdReg(v.Args[0]))
2201 p.To.Type = obj.TYPE_REG
2202 p.To.Reg = simdReg(v)
2203 return p
2204 }
2205
2206
2207 func simdV31loadResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2208 p := s.Prog(v.Op.Asm())
2209 p.From.Type = obj.TYPE_MEM
2210 p.From.Reg = v.Args[2].Reg()
2211 ssagen.AddAux(&p.From, v)
2212 p.AddRestSourceReg(simdReg(v.Args[1]))
2213 p.To.Type = obj.TYPE_REG
2214 p.To.Reg = simdReg(v)
2215 return p
2216 }
2217
2218
2219 func simdV3kvloadResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2220 p := s.Prog(v.Op.Asm())
2221 p.From.Type = obj.TYPE_MEM
2222 p.From.Reg = v.Args[2].Reg()
2223 ssagen.AddAux(&p.From, v)
2224 p.AddRestSourceReg(simdReg(v.Args[1]))
2225 p.AddRestSourceReg(maskReg(v.Args[3]))
2226 p.To.Type = obj.TYPE_REG
2227 p.To.Reg = simdReg(v)
2228 return p
2229 }
2230
2231
2232 func simdV2kvload(s *ssagen.State, v *ssa.Value) *obj.Prog {
2233 p := s.Prog(v.Op.Asm())
2234 p.From.Type = obj.TYPE_MEM
2235 p.From.Reg = v.Args[1].Reg()
2236 ssagen.AddAux(&p.From, v)
2237 p.AddRestSourceReg(simdReg(v.Args[0]))
2238 p.AddRestSourceReg(maskReg(v.Args[2]))
2239 p.To.Type = obj.TYPE_REG
2240 p.To.Reg = simdReg(v)
2241 return p
2242 }
2243
2244
2245 func simdV2kload(s *ssagen.State, v *ssa.Value) *obj.Prog {
2246 p := s.Prog(v.Op.Asm())
2247 p.From.Type = obj.TYPE_MEM
2248 p.From.Reg = v.Args[1].Reg()
2249 ssagen.AddAux(&p.From, v)
2250 p.AddRestSourceReg(simdReg(v.Args[0]))
2251 p.To.Type = obj.TYPE_REG
2252 p.To.Reg = maskReg(v)
2253 return p
2254 }
2255
2256
2257 func simdV11load(s *ssagen.State, v *ssa.Value) *obj.Prog {
2258 p := s.Prog(v.Op.Asm())
2259 p.From.Type = obj.TYPE_MEM
2260 p.From.Reg = v.Args[0].Reg()
2261 ssagen.AddAux(&p.From, v)
2262 p.To.Type = obj.TYPE_REG
2263 p.To.Reg = simdReg(v)
2264 return p
2265 }
2266
2267
2268 func simdV11loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2269 sc := v.AuxValAndOff()
2270 p := s.Prog(v.Op.Asm())
2271 p.From.Type = obj.TYPE_CONST
2272 p.From.Offset = sc.Val64()
2273 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
2274 ssagen.AddAux2(&m, v, sc.Off64())
2275 p.AddRestSource(m)
2276 p.To.Type = obj.TYPE_REG
2277 p.To.Reg = simdReg(v)
2278 return p
2279 }
2280
2281
2282 func simdVkvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2283 sc := v.AuxValAndOff()
2284 p := s.Prog(v.Op.Asm())
2285 p.From.Type = obj.TYPE_CONST
2286 p.From.Offset = sc.Val64()
2287 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
2288 ssagen.AddAux2(&m, v, sc.Off64())
2289 p.AddRestSource(m)
2290 p.AddRestSourceReg(maskReg(v.Args[1]))
2291 p.To.Type = obj.TYPE_REG
2292 p.To.Reg = simdReg(v)
2293 return p
2294 }
2295
2296
2297 func simdV21loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2298 sc := v.AuxValAndOff()
2299 p := s.Prog(v.Op.Asm())
2300 p.From.Type = obj.TYPE_CONST
2301 p.From.Offset = sc.Val64()
2302 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2303 ssagen.AddAux2(&m, v, sc.Off64())
2304 p.AddRestSource(m)
2305 p.AddRestSourceReg(simdReg(v.Args[0]))
2306 p.To.Type = obj.TYPE_REG
2307 p.To.Reg = simdReg(v)
2308 return p
2309 }
2310
2311
2312 func simdV2kloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2313 sc := v.AuxValAndOff()
2314 p := s.Prog(v.Op.Asm())
2315 p.From.Type = obj.TYPE_CONST
2316 p.From.Offset = sc.Val64()
2317 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2318 ssagen.AddAux2(&m, v, sc.Off64())
2319 p.AddRestSource(m)
2320 p.AddRestSourceReg(simdReg(v.Args[0]))
2321 p.To.Type = obj.TYPE_REG
2322 p.To.Reg = maskReg(v)
2323 return p
2324 }
2325
2326
2327 func simdV2kkloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2328 sc := v.AuxValAndOff()
2329 p := s.Prog(v.Op.Asm())
2330 p.From.Type = obj.TYPE_CONST
2331 p.From.Offset = sc.Val64()
2332 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2333 ssagen.AddAux2(&m, v, sc.Off64())
2334 p.AddRestSource(m)
2335 p.AddRestSourceReg(simdReg(v.Args[0]))
2336 p.AddRestSourceReg(maskReg(v.Args[2]))
2337 p.To.Type = obj.TYPE_REG
2338 p.To.Reg = maskReg(v)
2339 return p
2340 }
2341
2342
2343 func simdV2kvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2344 sc := v.AuxValAndOff()
2345 p := s.Prog(v.Op.Asm())
2346 p.From.Type = obj.TYPE_CONST
2347 p.From.Offset = sc.Val64()
2348 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2349 ssagen.AddAux2(&m, v, sc.Off64())
2350 p.AddRestSource(m)
2351 p.AddRestSourceReg(simdReg(v.Args[0]))
2352 p.AddRestSourceReg(maskReg(v.Args[2]))
2353 p.To.Type = obj.TYPE_REG
2354 p.To.Reg = simdReg(v)
2355 return p
2356 }
2357
2358
2359 func simdV21ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2360 p := s.Prog(v.Op.Asm())
2361 p.From.Type = obj.TYPE_REG
2362 p.From.Reg = simdReg(v.Args[1])
2363 p.To.Type = obj.TYPE_REG
2364 p.To.Reg = simdReg(v)
2365 return p
2366 }
2367
2368
2369 func simdV21ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2370 p := s.Prog(v.Op.Asm())
2371 p.From.Offset = int64(v.AuxUInt8())
2372 p.From.Type = obj.TYPE_CONST
2373 p.AddRestSourceReg(simdReg(v.Args[1]))
2374 p.To.Type = obj.TYPE_REG
2375 p.To.Reg = simdReg(v)
2376 return p
2377 }
2378
2379
2380 func simdV31x0AtIn2ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2381 return simdV31ResultInArg0(s, v)
2382 }
2383
2384 var blockJump = [...]struct {
2385 asm, invasm obj.As
2386 }{
2387 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE},
2388 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ},
2389 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE},
2390 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT},
2391 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT},
2392 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE},
2393 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC},
2394 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS},
2395 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
2396 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
2397 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
2398 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
2399 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
2400 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
2401 }
2402
2403 var eqfJumps = [2][2]ssagen.IndexJump{
2404 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}},
2405 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}},
2406 }
2407 var nefJumps = [2][2]ssagen.IndexJump{
2408 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}},
2409 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}},
2410 }
2411
2412 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2413 switch b.Kind {
2414 case ssa.BlockPlain, ssa.BlockDefer:
2415 if b.Succs[0].Block() != next {
2416 p := s.Prog(obj.AJMP)
2417 p.To.Type = obj.TYPE_BRANCH
2418 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2419 }
2420 case ssa.BlockExit, ssa.BlockRetJmp:
2421 case ssa.BlockRet:
2422 s.Prog(obj.ARET)
2423
2424 case ssa.BlockAMD64EQF:
2425 s.CombJump(b, next, &eqfJumps)
2426
2427 case ssa.BlockAMD64NEF:
2428 s.CombJump(b, next, &nefJumps)
2429
2430 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
2431 ssa.BlockAMD64LT, ssa.BlockAMD64GE,
2432 ssa.BlockAMD64LE, ssa.BlockAMD64GT,
2433 ssa.BlockAMD64OS, ssa.BlockAMD64OC,
2434 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
2435 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
2436 jmp := blockJump[b.Kind]
2437 switch next {
2438 case b.Succs[0].Block():
2439 s.Br(jmp.invasm, b.Succs[1].Block())
2440 case b.Succs[1].Block():
2441 s.Br(jmp.asm, b.Succs[0].Block())
2442 default:
2443 if b.Likely != ssa.BranchUnlikely {
2444 s.Br(jmp.asm, b.Succs[0].Block())
2445 s.Br(obj.AJMP, b.Succs[1].Block())
2446 } else {
2447 s.Br(jmp.invasm, b.Succs[1].Block())
2448 s.Br(obj.AJMP, b.Succs[0].Block())
2449 }
2450 }
2451
2452 case ssa.BlockAMD64JUMPTABLE:
2453
2454 p := s.Prog(obj.AJMP)
2455 p.To.Type = obj.TYPE_MEM
2456 p.To.Reg = b.Controls[1].Reg()
2457 p.To.Index = b.Controls[0].Reg()
2458 p.To.Scale = 8
2459
2460 s.JumpTables = append(s.JumpTables, b)
2461
2462 default:
2463 b.Fatalf("branch not implemented: %s", b.LongString())
2464 }
2465 }
2466
2467 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2468 p := s.Prog(loadByRegWidth(reg, t.Size()))
2469 p.From.Type = obj.TYPE_MEM
2470 p.From.Name = obj.NAME_AUTO
2471 p.From.Sym = n.Linksym()
2472 p.From.Offset = n.FrameOffset() + off
2473 p.To.Type = obj.TYPE_REG
2474 p.To.Reg = reg
2475 return p
2476 }
2477
2478 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2479 p = pp.Append(p, storeByRegWidth(reg, t.Size()), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2480 p.To.Name = obj.NAME_PARAM
2481 p.To.Sym = n.Linksym()
2482 p.Pos = p.Pos.WithNotStmt()
2483 return p
2484 }
2485
2486
2487 func zero16(s *ssagen.State, reg int16, off int64) {
2488
2489 p := s.Prog(x86.AMOVUPS)
2490 p.From.Type = obj.TYPE_REG
2491 p.From.Reg = x86.REG_X15
2492 p.To.Type = obj.TYPE_MEM
2493 p.To.Reg = reg
2494 p.To.Offset = off
2495 }
2496
2497
2498 func move16(s *ssagen.State, src, dst, tmp int16, off int64) {
2499
2500
2501 p := s.Prog(x86.AMOVUPS)
2502 p.From.Type = obj.TYPE_MEM
2503 p.From.Reg = src
2504 p.From.Offset = off
2505 p.To.Type = obj.TYPE_REG
2506 p.To.Reg = tmp
2507 p = s.Prog(x86.AMOVUPS)
2508 p.From.Type = obj.TYPE_REG
2509 p.From.Reg = tmp
2510 p.To.Type = obj.TYPE_MEM
2511 p.To.Reg = dst
2512 p.To.Offset = off
2513 }
2514
2515
2516
2517 func simdReg(v *ssa.Value) int16 {
2518 t := v.Type
2519 if !t.IsSIMD() {
2520 base.Fatalf("simdReg: not a simd type; v=%s, b=b%d, f=%s", v.LongString(), v.Block.ID, v.Block.Func.Name)
2521 }
2522 return simdRegBySize(v.Reg(), t.Size())
2523 }
2524
2525 func simdRegBySize(reg int16, size int64) int16 {
2526 switch size {
2527 case 16:
2528 return reg
2529 case 32:
2530 return reg + (x86.REG_Y0 - x86.REG_X0)
2531 case 64:
2532 return reg + (x86.REG_Z0 - x86.REG_X0)
2533 }
2534 panic("simdRegBySize: bad size")
2535 }
2536
2537
2538 func maskReg(v *ssa.Value) int16 {
2539 t := v.Type
2540 if !t.IsSIMD() {
2541 base.Fatalf("maskReg: not a simd type; v=%s, b=b%d, f=%s", v.LongString(), v.Block.ID, v.Block.Func.Name)
2542 }
2543 switch t.Size() {
2544 case 8:
2545 return v.Reg()
2546 }
2547 panic("unreachable")
2548 }
2549
2550
2551 func simdOrMaskReg(v *ssa.Value) int16 {
2552 t := v.Type
2553 if t.Size() <= 8 {
2554 return maskReg(v)
2555 }
2556 return simdReg(v)
2557 }
2558
2559
2560
2561
2562
2563 func simdCheckRegOnly(v *ssa.Value, regStart, regEnd int16) int16 {
2564 if v.Reg() > regEnd || v.Reg() < regStart {
2565 panic("simdCheckRegOnly: not the desired register")
2566 }
2567 return v.Reg()
2568 }
2569
View as plain text