Text file src/internal/bytealg/equal_riscv64.s

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "asm_riscv64.h"
     6  #include "go_asm.h"
     7  #include "textflag.h"
     8  
     9  #define	CTXT	S10
    10  
    11  // func memequal_varlen(a, b unsafe.Pointer) bool
    12  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
    13  	// X10 = a_base
    14  	// X11 = b_base
    15  	MOV	8(CTXT), X12    // compiler stores size at offset 8 in the closure
    16  	JMP	runtime·memequal<ABIInternal>(SB)
    17  
    18  // func memequal(a, b unsafe.Pointer, size uintptr) bool
    19  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    20  	// X10 = a_base
    21  	// X11 = b_base
    22  	// X12 = size
    23  	BNE	X10, X11, length_check
    24  	MOV	$0, X12
    25  
    26  length_check:
    27  	BEQZ	X12, done
    28  
    29  	MOV	$32, X23
    30  	BLT	X12, X23, loop4_check
    31  
    32  #ifndef hasV
    33  	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
    34  	BEQZ	X5, equal_scalar
    35  #endif
    36  
    37  	// Use vector if not 8 byte aligned.
    38  	OR	X10, X11, X5
    39  	AND	$7, X5
    40  	BNEZ	X5, vector_loop
    41  
    42  	// Use scalar if 8 byte aligned and <= 64 bytes.
    43  	SUB	$64, X12, X6
    44  	BLEZ	X6, loop32_check
    45  
    46  	PCALIGN	$16
    47  vector_loop:
    48  	VSETVLI	X12, E8, M8, TA, MA, X5
    49  	VLE8V	(X10), V8
    50  	VLE8V	(X11), V16
    51  	VMSNEVV	V8, V16, V0
    52  	VFIRSTM	V0, X6
    53  	BGEZ	X6, done
    54  	ADD	X5, X10
    55  	ADD	X5, X11
    56  	SUB	X5, X12
    57  	BNEZ	X12, vector_loop
    58  	JMP	done
    59  
    60  equal_scalar:
    61  	// Check alignment - if alignment differs we have to do one byte at a time.
    62  	AND	$7, X10, X9
    63  	AND	$7, X11, X19
    64  	BNE	X9, X19, loop4_check
    65  	BEQZ	X9, loop32_check
    66  
    67  	// Check one byte at a time until we reach 8 byte alignment.
    68  	SUB	X9, X0, X9
    69  	ADD	$8, X9, X9
    70  	SUB	X9, X12, X12
    71  align:
    72  	SUB	$1, X9
    73  	MOVBU	0(X10), X19
    74  	MOVBU	0(X11), X20
    75  	BNE	X19, X20, done
    76  	ADD	$1, X10
    77  	ADD	$1, X11
    78  	BNEZ	X9, align
    79  
    80  loop32_check:
    81  	MOV	$32, X9
    82  	BLT	X12, X9, loop16_check
    83  loop32:
    84  	MOV	0(X10), X19
    85  	MOV	0(X11), X20
    86  	MOV	8(X10), X21
    87  	MOV	8(X11), X22
    88  	BNE	X19, X20, done
    89  	BNE	X21, X22, done
    90  	MOV	16(X10), X14
    91  	MOV	16(X11), X15
    92  	MOV	24(X10), X16
    93  	MOV	24(X11), X17
    94  	BNE	X14, X15, done
    95  	BNE	X16, X17, done
    96  	ADD	$32, X10
    97  	ADD	$32, X11
    98  	SUB	$32, X12
    99  	BGE	X12, X9, loop32
   100  	BEQZ	X12, done
   101  
   102  loop16_check:
   103  	MOV	$16, X23
   104  	BLT	X12, X23, loop4_check
   105  loop16:
   106  	MOV	0(X10), X19
   107  	MOV	0(X11), X20
   108  	MOV	8(X10), X21
   109  	MOV	8(X11), X22
   110  	BNE	X19, X20, done
   111  	BNE	X21, X22, done
   112  	ADD	$16, X10
   113  	ADD	$16, X11
   114  	SUB	$16, X12
   115  	BGE	X12, X23, loop16
   116  	BEQZ	X12, done
   117  
   118  loop4_check:
   119  	MOV	$4, X23
   120  	BLT	X12, X23, loop1
   121  loop4:
   122  	MOVBU	0(X10), X19
   123  	MOVBU	0(X11), X20
   124  	MOVBU	1(X10), X21
   125  	MOVBU	1(X11), X22
   126  	BNE	X19, X20, done
   127  	BNE	X21, X22, done
   128  	MOVBU	2(X10), X14
   129  	MOVBU	2(X11), X15
   130  	MOVBU	3(X10), X16
   131  	MOVBU	3(X11), X17
   132  	BNE	X14, X15, done
   133  	BNE	X16, X17, done
   134  	ADD	$4, X10
   135  	ADD	$4, X11
   136  	SUB	$4, X12
   137  	BGE	X12, X23, loop4
   138  
   139  loop1:
   140  	BEQZ	X12, done
   141  	MOVBU	0(X10), X19
   142  	MOVBU	0(X11), X20
   143  	BNE	X19, X20, done
   144  	ADD	$1, X10
   145  	ADD	$1, X11
   146  	SUB	$1, X12
   147  	JMP	loop1
   148  
   149  done:
   150  	// If X12 is zero then memory is equivalent.
   151  	SEQZ	X12, X10
   152  	RET
   153  

View as plain text