Text file src/crypto/internal/fips140/bigmod/nat_s390x.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  // Register usage (z13 convention):
     8  // R2 = rp (result pointer)
     9  // R3 = ap (source pointer)
    10  // R4 = an / idx (loop counter)
    11  // R5 = b0 (multiplier limb)
    12  // R6 = cy (carry)
    13  
    14  #include "textflag.h"
    15  
    16  // func addMulVVW1024(z, x *uint, y uint) (c uint)
    17  TEXT ·addMulVVW1024(SB), $0-32
    18  	MOVD	$16, R4
    19  	JMP	addMulVVWx(SB)
    20  
    21  // func addMulVVW1536(z, x *uint, y uint) (c uint)
    22  TEXT ·addMulVVW1536(SB), $0-32
    23  	MOVD	$24, R4
    24  	JMP	addMulVVWx(SB)
    25  
    26  // func addMulVVW2048(z, x *uint, y uint) (c uint)
    27  TEXT ·addMulVVW2048(SB), $0-32
    28  	MOVD	$32, R4
    29  	JMP	addMulVVWx(SB)
    30  
    31  TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
    32  	MOVD z+0(FP), R2
    33  	MOVD x+8(FP), R3
    34  	MOVD y+16(FP), R5
    35  
    36  	MOVD	$0, R6
    37  
    38  L_ent:
    39  	VZERO	V0
    40  	VZERO	V2
    41  	SRD	$2, R4, R10
    42  	TMLL	R4, $1
    43  	BRC     $8, L_bx0
    44  
    45  L_bx1:
    46  	VLEG	$1, 0(R2), V2
    47  	VZERO	V4
    48  	TMLL	R4, $2
    49  	BRC     $7, L_b11
    50  
    51  L_b01:
    52  	MOVD	$-24, R4
    53  	MOVD	R6, R0
    54  	MOVD	0(R3), R7
    55  	MLGR	R5, R6
    56  	ADDC	R0, R7
    57  	MOVD	$0, R0
    58  	ADDE	R0, R6
    59  	VLVGG	$1, R7, V4
    60  	VAQ	V2, V4, V2
    61  	VSTEG	$1, V2, 0(R2)
    62  	VMRHG	V2, V2, V2
    63  	CMPBEQ	R10, $0, L_1
    64  	BR	L_cj0
    65  
    66  L_b11:
    67  	MOVD	$-8, R4
    68  	MOVD	0(R3), R9
    69  	MLGR	R5, R8
    70  	ADDC	R6, R9
    71  	MOVD	$0, R6
    72  	ADDE	R6, R8
    73  	VLVGG	$1, R9, V4
    74  	VAQ	V2, V4, V2
    75  	VSTEG	$1, V2, 0(R2)
    76  	VMRHG	V2, V2, V2
    77  	BR	L_cj1
    78  
    79  L_bx0:
    80  	TMLL	R4, $2
    81  	BRC	$7, L_b10
    82  
    83  L_b00:
    84  	MOVD	$-32, R4
    85  
    86  L_cj0:
    87  	MOVD	32(R3)(R4), R1
    88  	MOVD	40(R3)(R4), R9
    89  	MLGR	R5, R0
    90  	MLGR	R5, R8
    91  	VL	32(R4)(R2), V1
    92  	VPDI	$4, V1, V1, V1
    93  	VLVGP	R0, R1, V6
    94  	VLVGP	R9, R6, V7
    95  	BR	L_mid
    96  
    97  L_b10:
    98  	MOVD	$-16, R4
    99  	MOVD	R6, R8
   100  
   101  L_cj1:
   102  	MOVD	16(R4)(R3), R1
   103  	MOVD	24(R4)(R3), R7
   104  	MLGR	R5, R0
   105  	MLGR	R5, R6
   106  	VL	16(R4)(R2), V1
   107  	VPDI	$4, V1, V1, V1
   108  	VLVGP	R0, R1, V6
   109  	VLVGP	R7, R8, V7
   110  	CMPBEQ	R10, $0, L_end
   111  
   112  L_top:
   113  	MOVD	32(R4)(R3), R1
   114  	MOVD	40(R4)(R3), R9
   115  	MLGR	R5, R0
   116  	MLGR R5, R8
   117  	VACQ	V6, V1, V0, V5
   118  	VACCCQ	V6, V1, V0, V0
   119  	VACQ	V5, V7, V2, V3
   120  	VACCCQ	V5, V7, V2, V2
   121  	VPDI	$4, V3, V3, V3
   122  	VL	32(R4)(R2), V1
   123  	VPDI	$4, V1, V1, V1
   124  	VST	V3, 16(R4)(R2)
   125  	VLVGP	R0, R1, V6
   126  	VLVGP	R9, R6, V7
   127  
   128  L_mid:
   129  	MOVD	48(R4)(R3), R1
   130  	MOVD	56(R4)(R3), R7
   131  	MLGR	R5, R0
   132  	MLGR	R5, R6
   133  	VACQ	V6, V1, V0, V5
   134  	VACCCQ	V6, V1, V0, V0
   135  	VACQ	V5, V7, V2, V3
   136  	VACCCQ	V5, V7, V2, V2
   137  	VPDI	$4, V3, V3, V3
   138  	VL	48(R4)(R2), V1
   139  	VPDI	$4, V1, V1, V1
   140  	VST	V3, 32(R4)(R2)
   141  	VLVGP	R0, R1, V6
   142  	VLVGP	R7, R8, V7
   143  	MOVD	$32(R4), R4
   144  	BRCTG	R10, L_top
   145  
   146  L_end:
   147  	VACQ	V6, V1, V0, V5
   148  	VACCCQ	V6, V1, V0, V0
   149  	VACQ	V5, V7, V2, V3
   150  	VACCCQ	V5, V7, V2, V2
   151  	VPDI	$4, V3, V3, V3
   152  	VST	V3, 16(R2)(R4)
   153  	VAG	V0, V2, V2
   154  
   155  L_1:
   156  	VLGVG	$1, V2, R2
   157  	ADDC	R6, R2
   158  	MOVD	R2, c+24(FP)
   159  	RET
   160  
   161  

View as plain text