1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "asm_riscv64.h"
6 #include "go_asm.h"
7 #include "textflag.h"
8
9 #define CTXT S10
10
11 // func memequal_varlen(a, b unsafe.Pointer) bool
12 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
13 // X10 = a_base
14 // X11 = b_base
15 MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure
16 JMP runtime·memequal<ABIInternal>(SB)
17
18 // func memequal(a, b unsafe.Pointer, size uintptr) bool
19 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
20 // X10 = a_base
21 // X11 = b_base
22 // X12 = size
23 BNE X10, X11, length_check
24 MOV $0, X12
25
26 length_check:
27 BEQZ X12, done
28
29 MOV $32, X23
30 BLT X12, X23, loop4_check
31
32 #ifndef hasV
33 MOVB internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
34 BEQZ X5, equal_scalar
35 #endif
36
37 // Use vector if not 8 byte aligned.
38 OR X10, X11, X5
39 AND $7, X5
40 BNEZ X5, vector_loop
41
42 // Use scalar if 8 byte aligned and <= 64 bytes.
43 SUB $64, X12, X6
44 BLEZ X6, loop32_check
45
46 PCALIGN $16
47 vector_loop:
48 VSETVLI X12, E8, M8, TA, MA, X5
49 VLE8V (X10), V8
50 VLE8V (X11), V16
51 VMSNEVV V8, V16, V0
52 VFIRSTM V0, X6
53 BGEZ X6, done
54 ADD X5, X10
55 ADD X5, X11
56 SUB X5, X12
57 BNEZ X12, vector_loop
58 JMP done
59
60 equal_scalar:
61 // Check alignment - if alignment differs we have to do one byte at a time.
62 AND $7, X10, X9
63 AND $7, X11, X19
64 BNE X9, X19, loop4_check
65 BEQZ X9, loop32_check
66
67 // Check one byte at a time until we reach 8 byte alignment.
68 SUB X9, X0, X9
69 ADD $8, X9, X9
70 SUB X9, X12, X12
71 align:
72 SUB $1, X9
73 MOVBU 0(X10), X19
74 MOVBU 0(X11), X20
75 BNE X19, X20, done
76 ADD $1, X10
77 ADD $1, X11
78 BNEZ X9, align
79
80 loop32_check:
81 MOV $32, X9
82 BLT X12, X9, loop16_check
83 loop32:
84 MOV 0(X10), X19
85 MOV 0(X11), X20
86 MOV 8(X10), X21
87 MOV 8(X11), X22
88 BNE X19, X20, done
89 BNE X21, X22, done
90 MOV 16(X10), X14
91 MOV 16(X11), X15
92 MOV 24(X10), X16
93 MOV 24(X11), X17
94 BNE X14, X15, done
95 BNE X16, X17, done
96 ADD $32, X10
97 ADD $32, X11
98 SUB $32, X12
99 BGE X12, X9, loop32
100 BEQZ X12, done
101
102 loop16_check:
103 MOV $16, X23
104 BLT X12, X23, loop4_check
105 loop16:
106 MOV 0(X10), X19
107 MOV 0(X11), X20
108 MOV 8(X10), X21
109 MOV 8(X11), X22
110 BNE X19, X20, done
111 BNE X21, X22, done
112 ADD $16, X10
113 ADD $16, X11
114 SUB $16, X12
115 BGE X12, X23, loop16
116 BEQZ X12, done
117
118 loop4_check:
119 MOV $4, X23
120 BLT X12, X23, loop1
121 loop4:
122 MOVBU 0(X10), X19
123 MOVBU 0(X11), X20
124 MOVBU 1(X10), X21
125 MOVBU 1(X11), X22
126 BNE X19, X20, done
127 BNE X21, X22, done
128 MOVBU 2(X10), X14
129 MOVBU 2(X11), X15
130 MOVBU 3(X10), X16
131 MOVBU 3(X11), X17
132 BNE X14, X15, done
133 BNE X16, X17, done
134 ADD $4, X10
135 ADD $4, X11
136 SUB $4, X12
137 BGE X12, X23, loop4
138
139 loop1:
140 BEQZ X12, done
141 MOVBU 0(X10), X19
142 MOVBU 0(X11), X20
143 BNE X19, X20, done
144 ADD $1, X10
145 ADD $1, X11
146 SUB $1, X12
147 JMP loop1
148
149 done:
150 // If X12 is zero then memory is equivalent.
151 SEQZ X12, X10
152 RET
153
View as plain text