Text file src/internal/bytealg/indexbyte_riscv64.s

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "asm_riscv64.h"
     6  #include "go_asm.h"
     7  #include "textflag.h"
     8  
     9  TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
    10  	// X10 = b_base
    11  	// X11 = b_len
    12  	// X12 = b_cap (unused)
    13  	// X13 = byte to find
    14  	AND	$0xff, X13, X12		// x12 byte to look for
    15  
    16  	SLTI	$24, X11, X14
    17  	BNEZ	X14, small
    18  	JMP	indexByteBig<>(SB)
    19  
    20  small:
    21  	MOV	X10, X13		// store base for later
    22  	ADD	X10, X11		// end
    23  	SUB	$1, X10
    24  loop:
    25  	ADD	$1, X10
    26  	BEQ	X10, X11, notfound
    27  	MOVBU	(X10), X14
    28  	BNE	X12, X14, loop
    29  
    30  	SUB	X13, X10		// remove base
    31  	RET
    32  
    33  notfound:
    34  	MOV	$-1, X10
    35  	RET
    36  
    37  TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
    38  	// X10 = b_base
    39  	// X11 = b_len
    40  	// X12 = byte to find
    41  	AND	$0xff, X12		// x12 byte to look for
    42  
    43  	SLTI	$24, X11, X14
    44  	BNEZ	X14, small
    45  	JMP	indexByteBig<>(SB)
    46  
    47  small:
    48  	MOV	X10, X13		// store base for later
    49  	ADD	X10, X11		// end
    50  	SUB	$1, X10
    51  loop:
    52  	ADD	$1, X10
    53  	BEQ	X10, X11, notfound
    54  	MOVBU	(X10), X14
    55  	BNE	X12, X14, loop
    56  
    57  	SUB	X13, X10		// remove base
    58  	RET
    59  
    60  notfound:
    61  	MOV	$-1, X10
    62  	RET
    63  
    64  TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0
    65  	// On entry:
    66  	// X10 = b_base
    67  	// X11 = b_len (at least 16 bytes)
    68  	// X12 = byte to find
    69  	// On exit:
    70  	// X10 = index of first instance of sought byte, if found, or -1 otherwise
    71  
    72  	MOV	X10, X13		// store base for later
    73  
    74  #ifndef hasV
    75  	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
    76  	BEQZ	X5, indexbyte_scalar
    77  #endif
    78  
    79  	PCALIGN	$16
    80  vector_loop:
    81  	VSETVLI	X11, E8, M8, TA, MA, X5
    82  	VLE8V	(X10), V8
    83  	VMSEQVX	X12, V8, V0
    84  	VFIRSTM	V0, X6
    85  	BGEZ	X6, vector_found
    86  	ADD	X5, X10
    87  	SUB	X5, X11
    88  	BNEZ	X11, vector_loop
    89  	JMP	notfound
    90  
    91  vector_found:
    92  	SUB	X13, X10
    93  	ADD	X6, X10
    94  	RET
    95  
    96  indexbyte_scalar:
    97  	ADD	X10, X11		// end
    98  
    99  	// Process the first few bytes until we get to an 8 byte boundary
   100  	// No need to check for end here as we have at least 16 bytes in
   101  	// the buffer.
   102  
   103  unalignedloop:
   104  	AND	$7, X10, X14
   105  	BEQZ	X14, aligned
   106  	MOVBU	(X10), X14
   107  	BEQ	X12, X14, found
   108  	ADD	$1, X10
   109  	JMP	unalignedloop
   110  
   111  aligned:
   112  	AND	$~7, X11, X15		// X15 = end of aligned data
   113  
   114  	// We have at least 9 bytes left
   115  
   116  	// Use 'Determine if a word has a byte equal to n' bit hack from
   117  	// https://graphics.stanford.edu/~seander/bithacks.html to determine
   118  	// whether the byte is present somewhere in the next 8 bytes of the
   119  	// array.
   120  
   121  	MOV	$0x0101010101010101, X16
   122  	SLLI	$7, X16, X17		// X17 = 0x8080808080808080
   123  
   124  	MUL	X12, X16, X18		// broadcast X12 to every byte in X18
   125  
   126  alignedloop:
   127  	MOV	(X10), X14
   128  	XOR	X14, X18, X19
   129  
   130  	// If the LSB in X12 is present somewhere in the 8 bytes we've just
   131  	// loaded into X14 then at least one of the bytes in X19 will be 0
   132  	// after the XOR.  If any of the bytes in X19 are zero then
   133  	//
   134  	// ((X19 - X16) & (~X19) & X17)
   135  	//
   136  	// will be non-zero.  The expression will evaluate to zero if none of
   137  	// the bytes in X19 are zero, i.e., X12 is not present in X14.
   138  
   139  	SUB	X16, X19, X20
   140  	ANDN	X19, X17, X21
   141  	AND	X20, X21
   142  	BNEZ	X21, tailloop		// If X21 != 0 X12 is present in X14
   143  	ADD	$8, X10
   144  	BNE	X10, X15, alignedloop
   145  
   146  tailloop:
   147  	SUB	$1, X10
   148  
   149  loop:
   150  	ADD	$1, X10
   151  	BEQ	X10, X11, notfound
   152  	MOVBU	(X10), X14
   153  	BNE	X12, X14, loop
   154  
   155  found:
   156  	SUB	X13, X10		// remove base
   157  	RET
   158  
   159  notfound:
   160  	MOV	$-1, X10
   161  	RET
   162  

View as plain text