// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build !purego // Register usage (z13 convention): // R2 = rp (result pointer) // R3 = ap (source pointer) // R4 = an / idx (loop counter) // R5 = b0 (multiplier limb) // R6 = cy (carry) #include "textflag.h" // func addMulVVW1024(z, x *uint, y uint) (c uint) TEXT ·addMulVVW1024(SB), $0-32 MOVD $16, R4 JMP addMulVVWx(SB) // func addMulVVW1536(z, x *uint, y uint) (c uint) TEXT ·addMulVVW1536(SB), $0-32 MOVD $24, R4 JMP addMulVVWx(SB) // func addMulVVW2048(z, x *uint, y uint) (c uint) TEXT ·addMulVVW2048(SB), $0-32 MOVD $32, R4 JMP addMulVVWx(SB) TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0 MOVD z+0(FP), R2 MOVD x+8(FP), R3 MOVD y+16(FP), R5 MOVD $0, R6 L_ent: VZERO V0 VZERO V2 SRD $2, R4, R10 TMLL R4, $1 BRC $8, L_bx0 L_bx1: VLEG $1, 0(R2), V2 VZERO V4 TMLL R4, $2 BRC $7, L_b11 L_b01: MOVD $-24, R4 MOVD R6, R0 MOVD 0(R3), R7 MLGR R5, R6 ADDC R0, R7 MOVD $0, R0 ADDE R0, R6 VLVGG $1, R7, V4 VAQ V2, V4, V2 VSTEG $1, V2, 0(R2) VMRHG V2, V2, V2 CMPBEQ R10, $0, L_1 BR L_cj0 L_b11: MOVD $-8, R4 MOVD 0(R3), R9 MLGR R5, R8 ADDC R6, R9 MOVD $0, R6 ADDE R6, R8 VLVGG $1, R9, V4 VAQ V2, V4, V2 VSTEG $1, V2, 0(R2) VMRHG V2, V2, V2 BR L_cj1 L_bx0: TMLL R4, $2 BRC $7, L_b10 L_b00: MOVD $-32, R4 L_cj0: MOVD 32(R3)(R4), R1 MOVD 40(R3)(R4), R9 MLGR R5, R0 MLGR R5, R8 VL 32(R4)(R2), V1 VPDI $4, V1, V1, V1 VLVGP R0, R1, V6 VLVGP R9, R6, V7 BR L_mid L_b10: MOVD $-16, R4 MOVD R6, R8 L_cj1: MOVD 16(R4)(R3), R1 MOVD 24(R4)(R3), R7 MLGR R5, R0 MLGR R5, R6 VL 16(R4)(R2), V1 VPDI $4, V1, V1, V1 VLVGP R0, R1, V6 VLVGP R7, R8, V7 CMPBEQ R10, $0, L_end L_top: MOVD 32(R4)(R3), R1 MOVD 40(R4)(R3), R9 MLGR R5, R0 MLGR R5, R8 VACQ V6, V1, V0, V5 VACCCQ V6, V1, V0, V0 VACQ V5, V7, V2, V3 VACCCQ V5, V7, V2, V2 VPDI $4, V3, V3, V3 VL 32(R4)(R2), V1 VPDI $4, V1, V1, V1 VST V3, 16(R4)(R2) VLVGP R0, R1, V6 VLVGP R9, R6, V7 L_mid: MOVD 48(R4)(R3), R1 MOVD 56(R4)(R3), R7 MLGR R5, R0 MLGR R5, R6 VACQ V6, V1, V0, V5 VACCCQ V6, V1, V0, V0 VACQ V5, V7, V2, V3 VACCCQ V5, V7, V2, V2 VPDI $4, V3, V3, V3 VL 48(R4)(R2), V1 VPDI $4, V1, V1, V1 VST V3, 32(R4)(R2) VLVGP R0, R1, V6 VLVGP R7, R8, V7 MOVD $32(R4), R4 BRCTG R10, L_top L_end: VACQ V6, V1, V0, V5 VACCCQ V6, V1, V0, V0 VACQ V5, V7, V2, V3 VACCCQ V5, V7, V2, V2 VPDI $4, V3, V3, V3 VST V3, 16(R2)(R4) VAG V0, V2, V2 L_1: VLGVG $1, V2, R2 ADDC R6, R2 MOVD R2, c+24(FP) RET