Text file src/runtime/memclr_s390x.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // See memclrNoHeapPointers Go doc for important implementation constraints.
     8  
     9  // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
    10  TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-16
    11  #ifndef GOEXPERIMENT_regabiargs
    12  	MOVD	ptr+0(FP), R4
    13  	MOVD	n+8(FP), R5
    14  #else
    15  	MOVD	R2, R4
    16  	MOVD	R3, R5
    17  #endif
    18  	CMPBGE	R5, $32, clearge32
    19  
    20  start:
    21  	CMPBLE	R5, $3, clear0to3
    22  	CMPBLE	R5, $7, clear4to7
    23  	CMPBLE	R5, $11, clear8to11
    24  	CMPBLE	R5, $15, clear12to15
    25  	MOVD	$0, 0(R4)
    26  	MOVD	$0, 8(R4)
    27  	ADD	$16, R4
    28  	SUB	$16, R5
    29  	BR	start
    30  
    31  clear0to3:
    32  	CMPBEQ	R5, $0, done
    33  	CMPBNE	R5, $1, clear2
    34  	MOVB	$0, 0(R4)
    35  	RET
    36  clear2:
    37  	CMPBNE	R5, $2, clear3
    38  	MOVH	$0, 0(R4)
    39  	RET
    40  clear3:
    41  	MOVH	$0, 0(R4)
    42  	MOVB	$0, 2(R4)
    43  	RET
    44  
    45  clear4to7:
    46  	CMPBNE	R5, $4, clear5
    47  	MOVW	$0, 0(R4)
    48  	RET
    49  clear5:
    50  	CMPBNE	R5, $5, clear6
    51  	MOVW	$0, 0(R4)
    52  	MOVB	$0, 4(R4)
    53  	RET
    54  clear6:
    55  	CMPBNE	R5, $6, clear7
    56  	MOVW	$0, 0(R4)
    57  	MOVH	$0, 4(R4)
    58  	RET
    59  clear7:
    60  	MOVW	$0, 0(R4)
    61  	MOVH	$0, 4(R4)
    62  	MOVB	$0, 6(R4)
    63  	RET
    64  
    65  clear8to11:
    66  	CMPBNE	R5, $8, clear9
    67  	MOVD	$0, 0(R4)
    68  	RET
    69  clear9:
    70  	CMPBNE	R5, $9, clear10
    71  	MOVD	$0, 0(R4)
    72  	MOVB	$0, 8(R4)
    73  	RET
    74  clear10:
    75  	CMPBNE	R5, $10, clear11
    76  	MOVD	$0, 0(R4)
    77  	MOVH	$0, 8(R4)
    78  	RET
    79  clear11:
    80  	MOVD	$0, 0(R4)
    81  	MOVH	$0, 8(R4)
    82  	MOVB	$0, 10(R4)
    83  	RET
    84  
    85  clear12to15:
    86  	CMPBNE	R5, $12, clear13
    87  	MOVD	$0, 0(R4)
    88  	MOVW	$0, 8(R4)
    89  	RET
    90  clear13:
    91  	CMPBNE	R5, $13, clear14
    92  	MOVD	$0, 0(R4)
    93  	MOVW	$0, 8(R4)
    94  	MOVB	$0, 12(R4)
    95  	RET
    96  clear14:
    97  	CMPBNE	R5, $14, clear15
    98  	MOVD	$0, 0(R4)
    99  	MOVW	$0, 8(R4)
   100  	MOVH	$0, 12(R4)
   101  	RET
   102  clear15:
   103  	MOVD	$0, 0(R4)
   104  	MOVW	$0, 8(R4)
   105  	MOVH	$0, 12(R4)
   106  	MOVB	$0, 14(R4)
   107  	RET
   108  
   109  clearge32:
   110  	CMP	R5, $4096
   111  	BLT	clear256Bto4KB
   112  
   113  // For size >= 4KB, XC is loop unrolled 16 times (4KB = 256B * 16)
   114  clearge4KB:
   115  	XC	$256, 0(R4), 0(R4)
   116  	XC	$256, 256(R4), 256(R4)
   117  	XC	$256, 512(R4), 512(R4)
   118  	XC	$256, 768(R4), 768(R4)
   119  	XC	$256, 1024(R4), 1024(R4)
   120  	XC	$256, 1280(R4), 1280(R4)
   121  	XC	$256, 1536(R4), 1536(R4)
   122  	XC	$256, 1792(R4), 1792(R4)
   123  	XC	$256, 2048(R4), 2048(R4)
   124  	XC	$256, 2304(R4), 2304(R4)
   125  	XC	$256, 2560(R4), 2560(R4)
   126  	XC	$256, 2816(R4), 2816(R4)
   127  	XC	$256, 3072(R4), 3072(R4)
   128  	XC	$256, 3328(R4), 3328(R4)
   129  	XC	$256, 3584(R4), 3584(R4)
   130  	XC	$256, 3840(R4), 3840(R4)
   131  	ADD	$4096, R4
   132  	ADD	$-4096, R5
   133  	CMP	R5, $4096
   134  	BGE	clearge4KB
   135  
   136  clear256Bto4KB:
   137  	CMP	R5, $256
   138  	BLT	clear32to255
   139  	XC	$256, 0(R4), 0(R4)
   140  	ADD	$256, R4
   141  	ADD	$-256, R5
   142  	BR	clear256Bto4KB
   143  
   144  clear32to255:
   145  	CMPBEQ	R5, $0, done
   146  	CMPBLT	R5, $32, start
   147  	CMPBEQ	R5, $32, clear32
   148  	CMPBLE	R5, $64, clear33to64
   149  	CMP	R5, $128
   150  	BLE	clear65to128
   151  	CMP	R5, $255
   152  	BLE	clear129to255
   153  
   154  clear32:
   155  	VZERO	V1
   156  	VST	V1, 0(R4)
   157  	VST	V1, 16(R4)
   158  	RET
   159  
   160  clear33to64:
   161  	VZERO	V1
   162  	VST	V1, 0(R4)
   163  	VST	V1, 16(R4)
   164  	ADD	$-32, R5
   165  	VST	V1, 0(R4)(R5)
   166  	VST	V1, 16(R4)(R5)
   167  	RET
   168  
   169  clear65to128:
   170  	VZERO	V1
   171  	VST	V1, 0(R4)
   172  	VST	V1, 16(R4)
   173  	VST	V1, 32(R4)
   174  	VST	V1, 48(R4)
   175  	ADD	$-64, R5
   176  	VST	V1, 0(R4)(R5)
   177  	VST	V1, 16(R4)(R5)
   178  	VST	V1, 32(R4)(R5)
   179  	VST	V1, 48(R4)(R5)
   180  	RET
   181  
   182  clear129to255:
   183  	VZERO	V1
   184  	VST	V1, 0(R4)
   185  	VST	V1, 16(R4)
   186  	VST	V1, 32(R4)
   187  	VST	V1, 48(R4)
   188  	VST	V1, 64(R4)
   189  	VST	V1, 80(R4)
   190  	VST	V1, 96(R4)
   191  	VST	V1, 112(R4)
   192  	ADD	$-128, R5
   193  	VST	V1, 0(R4)(R5)
   194  	VST	V1, 16(R4)(R5)
   195  	VST	V1, 32(R4)(R5)
   196  	VST	V1, 48(R4)(R5)
   197  	VST	V1, 64(R4)(R5)
   198  	VST	V1, 80(R4)(R5)
   199  	VST	V1, 96(R4)(R5)
   200  	VST	V1, 112(R4)(R5)
   201  	RET
   202  
   203  done:
   204  	RET
   205  
   206  

View as plain text