1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "asm_riscv64.h"
6 #include "go_asm.h"
7 #include "textflag.h"
8
9 TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
10 // X10 = b_base
11 // X11 = b_len
12 // X12 = b_cap (unused)
13 // X13 = byte to find
14 AND $0xff, X13, X12 // x12 byte to look for
15
16 SLTI $24, X11, X14
17 BNEZ X14, small
18 JMP indexByteBig<>(SB)
19
20 small:
21 MOV X10, X13 // store base for later
22 ADD X10, X11 // end
23 SUB $1, X10
24 loop:
25 ADD $1, X10
26 BEQ X10, X11, notfound
27 MOVBU (X10), X14
28 BNE X12, X14, loop
29
30 SUB X13, X10 // remove base
31 RET
32
33 notfound:
34 MOV $-1, X10
35 RET
36
37 TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
38 // X10 = b_base
39 // X11 = b_len
40 // X12 = byte to find
41 AND $0xff, X12 // x12 byte to look for
42
43 SLTI $24, X11, X14
44 BNEZ X14, small
45 JMP indexByteBig<>(SB)
46
47 small:
48 MOV X10, X13 // store base for later
49 ADD X10, X11 // end
50 SUB $1, X10
51 loop:
52 ADD $1, X10
53 BEQ X10, X11, notfound
54 MOVBU (X10), X14
55 BNE X12, X14, loop
56
57 SUB X13, X10 // remove base
58 RET
59
60 notfound:
61 MOV $-1, X10
62 RET
63
64 TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0
65 // On entry:
66 // X10 = b_base
67 // X11 = b_len (at least 16 bytes)
68 // X12 = byte to find
69 // On exit:
70 // X10 = index of first instance of sought byte, if found, or -1 otherwise
71
72 MOV X10, X13 // store base for later
73
74 #ifndef hasV
75 MOVB internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
76 BEQZ X5, indexbyte_scalar
77 #endif
78
79 PCALIGN $16
80 vector_loop:
81 VSETVLI X11, E8, M8, TA, MA, X5
82 VLE8V (X10), V8
83 VMSEQVX X12, V8, V0
84 VFIRSTM V0, X6
85 BGEZ X6, vector_found
86 ADD X5, X10
87 SUB X5, X11
88 BNEZ X11, vector_loop
89 JMP notfound
90
91 vector_found:
92 SUB X13, X10
93 ADD X6, X10
94 RET
95
96 indexbyte_scalar:
97 ADD X10, X11 // end
98
99 // Process the first few bytes until we get to an 8 byte boundary
100 // No need to check for end here as we have at least 16 bytes in
101 // the buffer.
102
103 unalignedloop:
104 AND $7, X10, X14
105 BEQZ X14, aligned
106 MOVBU (X10), X14
107 BEQ X12, X14, found
108 ADD $1, X10
109 JMP unalignedloop
110
111 aligned:
112 AND $~7, X11, X15 // X15 = end of aligned data
113
114 // We have at least 9 bytes left
115
116 // Use 'Determine if a word has a byte equal to n' bit hack from
117 // https://graphics.stanford.edu/~seander/bithacks.html to determine
118 // whether the byte is present somewhere in the next 8 bytes of the
119 // array.
120
121 MOV $0x0101010101010101, X16
122 SLLI $7, X16, X17 // X17 = 0x8080808080808080
123
124 MUL X12, X16, X18 // broadcast X12 to every byte in X18
125
126 alignedloop:
127 MOV (X10), X14
128 XOR X14, X18, X19
129
130 // If the LSB in X12 is present somewhere in the 8 bytes we've just
131 // loaded into X14 then at least one of the bytes in X19 will be 0
132 // after the XOR. If any of the bytes in X19 are zero then
133 //
134 // ((X19 - X16) & (~X19) & X17)
135 //
136 // will be non-zero. The expression will evaluate to zero if none of
137 // the bytes in X19 are zero, i.e., X12 is not present in X14.
138
139 SUB X16, X19, X20
140 ANDN X19, X17, X21
141 AND X20, X21
142 BNEZ X21, tailloop // If X21 != 0 X12 is present in X14
143 ADD $8, X10
144 BNE X10, X15, alignedloop
145
146 tailloop:
147 SUB $1, X10
148
149 loop:
150 ADD $1, X10
151 BEQ X10, X11, notfound
152 MOVBU (X10), X14
153 BNE X12, X14, loop
154
155 found:
156 SUB X13, X10 // remove base
157 RET
158
159 notfound:
160 MOV $-1, X10
161 RET
162
View as plain text