Source file src/runtime/mkpreempt.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  // mkpreempt generates the asyncPreempt functions for each
     8  // architecture.
     9  package main
    10  
    11  import (
    12  	"bytes"
    13  	"flag"
    14  	"fmt"
    15  	"go/format"
    16  	"io"
    17  	"log"
    18  	"os"
    19  	"strings"
    20  )
    21  
    22  // Copied from cmd/compile/internal/ssa/gen/*Ops.go
    23  
    24  var regNames386 = []string{
    25  	"AX",
    26  	"CX",
    27  	"DX",
    28  	"BX",
    29  	"SP",
    30  	"BP",
    31  	"SI",
    32  	"DI",
    33  	"X0",
    34  	"X1",
    35  	"X2",
    36  	"X3",
    37  	"X4",
    38  	"X5",
    39  	"X6",
    40  	"X7",
    41  }
    42  
    43  var regNamesAMD64 = []string{
    44  	"AX",
    45  	"CX",
    46  	"DX",
    47  	"BX",
    48  	"SP",
    49  	"BP",
    50  	"SI",
    51  	"DI",
    52  	"R8",
    53  	"R9",
    54  	"R10",
    55  	"R11",
    56  	"R12",
    57  	"R13",
    58  	"R14",
    59  	"R15",
    60  	"X0",
    61  	"X1",
    62  	"X2",
    63  	"X3",
    64  	"X4",
    65  	"X5",
    66  	"X6",
    67  	"X7",
    68  	"X8",
    69  	"X9",
    70  	"X10",
    71  	"X11",
    72  	"X12",
    73  	"X13",
    74  	"X14",
    75  	"X15",
    76  }
    77  
    78  var arches = map[string]func(g *gen){
    79  	"386":     gen386,
    80  	"amd64":   genAMD64,
    81  	"arm":     genARM,
    82  	"arm64":   genARM64,
    83  	"loong64": genLoong64,
    84  	"mips64x": func(g *gen) { genMIPS(g, true) },
    85  	"mipsx":   func(g *gen) { genMIPS(g, false) },
    86  	"ppc64x":  genPPC64,
    87  	"riscv64": genRISCV64,
    88  	"s390x":   genS390X,
    89  	"wasm":    genWasm,
    90  }
    91  var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
    92  
    93  func main() {
    94  	flag.Parse()
    95  	if flag.NArg() > 0 {
    96  		for _, arch := range flag.Args() {
    97  			genFn, ok := arches[arch]
    98  			if !ok {
    99  				log.Fatalf("unknown arch %s", arch)
   100  			}
   101  			g := gen{os.Stdout, arch}
   102  			g.asmHeader()
   103  			genFn(&g)
   104  		}
   105  		return
   106  	}
   107  
   108  	for arch, genFn := range arches {
   109  		f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
   110  		if err != nil {
   111  			log.Fatal(err)
   112  		}
   113  		g := gen{f, arch}
   114  		g.asmHeader()
   115  		genFn(&g)
   116  		if err := f.Close(); err != nil {
   117  			log.Fatal(err)
   118  		}
   119  	}
   120  }
   121  
   122  type gen struct {
   123  	w      io.Writer
   124  	goarch string
   125  }
   126  
   127  func (g *gen) commonHeader() {
   128  	fmt.Fprintf(g.w, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
   129  	if beLe[g.goarch] {
   130  		base := g.goarch[:len(g.goarch)-1]
   131  		fmt.Fprintf(g.w, "//go:build %s || %sle\n\n", base, base)
   132  	}
   133  }
   134  
   135  func (g *gen) asmHeader() {
   136  	g.commonHeader()
   137  	fmt.Fprintf(g.w, "#include \"go_asm.h\"\n")
   138  	if g.goarch == "amd64" {
   139  		fmt.Fprintf(g.w, "#include \"go_tls.h\"\n")
   140  		fmt.Fprintf(g.w, "#include \"asm_amd64.h\"\n")
   141  	}
   142  	fmt.Fprintf(g.w, "#include \"textflag.h\"\n\n")
   143  	fmt.Fprintf(g.w, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
   144  }
   145  
   146  func (g *gen) p(f string, args ...any) {
   147  	fmted := fmt.Sprintf(f, args...)
   148  	fmt.Fprintf(g.w, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t"))
   149  }
   150  
   151  func (g *gen) label(l string) {
   152  	fmt.Fprintf(g.w, "%s\n", l)
   153  }
   154  
   155  // writeXRegs writes an architecture xregs file.
   156  func writeXRegs(arch string, l *layout) {
   157  	var code bytes.Buffer
   158  	g := gen{&code, arch}
   159  	g.commonHeader()
   160  	fmt.Fprintf(g.w, `
   161  package runtime
   162  
   163  type xRegs struct {
   164  `)
   165  	pos := 0
   166  	for _, seq := range l.regs {
   167  		for _, r := range seq.regs {
   168  			if r.pos != pos && !seq.fixedOffset {
   169  				log.Fatalf("padding not implemented")
   170  			}
   171  			typ := fmt.Sprintf("[%d]byte", r.size)
   172  			switch {
   173  			case r.size == 4 && r.pos%4 == 0:
   174  				typ = "uint32"
   175  			case r.size == 8 && r.pos%8 == 0:
   176  				typ = "uint64"
   177  			}
   178  			fmt.Fprintf(g.w, "\t%s %s\n", r.name, typ)
   179  			pos += r.size
   180  		}
   181  	}
   182  	fmt.Fprintf(g.w, "}\n")
   183  
   184  	path := fmt.Sprintf("preempt_%s.go", arch)
   185  	b, err := format.Source(code.Bytes())
   186  	if err != nil {
   187  		log.Fatalf("formatting %s: %s", path, err)
   188  	}
   189  	if err := os.WriteFile(path, b, 0666); err != nil {
   190  		log.Fatal(err)
   191  	}
   192  }
   193  
   194  type layout struct {
   195  	stack int
   196  	regs  []regSeq
   197  	sp    string // stack pointer register
   198  }
   199  
   200  type regInfo struct {
   201  	size int    // register size in bytes
   202  	name string // register name
   203  
   204  	// Some register names may require a specific suffix.
   205  	// In ARM64, a suffix called an "arrangement specifier" can be added to
   206  	// a register name. For example:
   207  	//
   208  	//	V0.B16
   209  	//
   210  	// In this case, "V0" is the register name, and ".B16" is the suffix.
   211  	suffix string
   212  
   213  	pos int // position on stack
   214  }
   215  
   216  // Some save/restore operations can involve multiple registers in a single
   217  // instruction. For example, the LDP/STP instructions in ARM64:
   218  //
   219  //	LDP 8(RSP), (R0, R1)
   220  //	STP (R0, R1), 8(RSP)
   221  //
   222  // In these cases, a pair of registers (R0, R1) is used as a single argument.
   223  type regSeq struct {
   224  	saveOp    string
   225  	restoreOp string
   226  	regs      []regInfo
   227  
   228  	// By default, all registers are saved on the stack, and the stack pointer offset
   229  	// is calculated based on the size of each register. For example (ARM64):
   230  	//
   231  	//   STP (R0, R1), 8(RSP)
   232  	//   STP (R2, R3), 24(RSP)
   233  	//
   234  	// However, automatic offset calculation may not always be desirable.
   235  	// In some cases, the offset must remain fixed:
   236  	//
   237  	//   VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(R0)
   238  	//   VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
   239  	//
   240  	// In this example, R0 is post-incremented after each instruction,
   241  	// so the offset should not be recalculated. For such cases,
   242  	// `fixedOffset` is set to true.
   243  	fixedOffset bool
   244  
   245  	// After conversion to a string, register names are separated by commas
   246  	// and may be wrapped in a custom pair of brackets. For example (ARM64):
   247  	//
   248  	//   (R0, R1) // wrapped in parentheses
   249  	//   [V0.B16, V1.B16, V2.B16, V3.B16] // wrapped in square brackets
   250  	brackets [2]string
   251  
   252  	// If this register requires special save and restore, these
   253  	// give those operations with a %d placeholder for the stack
   254  	// offset.
   255  	save, restore string
   256  }
   257  
   258  func (l *layout) add(op, regname string, size int) {
   259  	l.regs = append(l.regs, regSeq{saveOp: op, restoreOp: op, regs: []regInfo{{size, regname, "", l.stack}}})
   260  	l.stack += size
   261  }
   262  
   263  func (l *layout) add2(sop, rop string, regs []regInfo, brackets [2]string, fixedOffset bool) {
   264  	l.regs = append(l.regs, regSeq{saveOp: sop, restoreOp: rop, regs: regs, brackets: brackets, fixedOffset: fixedOffset})
   265  	if !fixedOffset {
   266  		for i := range regs {
   267  			regs[i].pos = l.stack
   268  			l.stack += regs[i].size
   269  		}
   270  	}
   271  }
   272  
   273  func (l *layout) addSpecial(save, restore string, size int) {
   274  	l.regs = append(l.regs, regSeq{save: save, restore: restore, regs: []regInfo{{size, "", "", l.stack}}})
   275  	l.stack += size
   276  }
   277  
   278  func (rs *regSeq) String() string {
   279  	switch len(rs.regs) {
   280  	case 0:
   281  		log.Fatal("Register sequence must not be empty!")
   282  	case 1:
   283  		return rs.regs[0].name
   284  	default:
   285  		names := make([]string, 0)
   286  		for _, r := range rs.regs {
   287  			name := r.name + r.suffix
   288  			names = append(names, name)
   289  		}
   290  		return rs.brackets[0] + strings.Join(names, ", ") + rs.brackets[1]
   291  	}
   292  	return ""
   293  }
   294  
   295  func (l *layout) save(g *gen) {
   296  	for _, seq := range l.regs {
   297  		if len(seq.regs) < 1 {
   298  			log.Fatal("Register sequence must not be empty!")
   299  		}
   300  		// When dealing with a sequence of registers, we assume that only the position
   301  		// of the first register is relevant. For example:
   302  		//
   303  		//   STP (R0, R1), 8(RSP)
   304  		//   STP (R2, R3), 24(RSP)
   305  		//
   306  		// Here, R0.pos is 8. While we can infer that R1.pos is 16, it doesn't need to
   307  		// be explicitly specified, as the STP instruction calculates it automatically.
   308  		pos := seq.regs[0].pos
   309  		if seq.save != "" {
   310  			g.p(seq.save, pos)
   311  		} else {
   312  			name := seq.String()
   313  			g.p("%s %s, %d(%s)", seq.saveOp, name, pos, l.sp)
   314  		}
   315  	}
   316  }
   317  
   318  func (l *layout) restoreInOrder(g *gen, reverse bool) {
   319  	var seq []regSeq
   320  	if reverse {
   321  		seq = make([]regSeq, 0)
   322  		for i := len(l.regs) - 1; i >= 0; i-- {
   323  			seq = append(seq, l.regs[i])
   324  		}
   325  	} else {
   326  		seq = l.regs
   327  	}
   328  	for _, reg := range seq {
   329  		if len(reg.regs) < 1 {
   330  			log.Fatal("Register sequence must not be empty!")
   331  		}
   332  		pos := reg.regs[0].pos
   333  		if reg.restore != "" {
   334  			g.p(reg.restore, pos)
   335  		} else {
   336  			g.p("%s %d(%s), %s", reg.restoreOp, pos, l.sp, reg.String())
   337  		}
   338  	}
   339  }
   340  
   341  func (l *layout) restore(g *gen) {
   342  	l.restoreInOrder(g, true)
   343  }
   344  
   345  func (l *layout) restoreDirect(g *gen) {
   346  	l.restoreInOrder(g, false)
   347  }
   348  
   349  func gen386(g *gen) {
   350  	p := g.p
   351  
   352  	p("PUSHFL")
   353  	// Save general purpose registers.
   354  	var l = layout{sp: "SP"}
   355  	for _, reg := range regNames386 {
   356  		if reg == "SP" || strings.HasPrefix(reg, "X") {
   357  			continue
   358  		}
   359  		l.add("MOVL", reg, 4)
   360  	}
   361  
   362  	softfloat := "GO386_softfloat"
   363  
   364  	// Save SSE state only if supported.
   365  	lSSE := layout{stack: l.stack, sp: "SP"}
   366  	for i := 0; i < 8; i++ {
   367  		lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
   368  	}
   369  
   370  	p("ADJSP $%d", lSSE.stack)
   371  	p("NOP SP")
   372  	l.save(g)
   373  	p("#ifndef %s", softfloat)
   374  	lSSE.save(g)
   375  	p("#endif")
   376  	p("CALL ·asyncPreempt2(SB)")
   377  	p("#ifndef %s", softfloat)
   378  	lSSE.restore(g)
   379  	p("#endif")
   380  	l.restore(g)
   381  	p("ADJSP $%d", -lSSE.stack)
   382  
   383  	p("POPFL")
   384  	p("RET")
   385  }
   386  
   387  func genAMD64(g *gen) {
   388  	const xReg = "AX" // *xRegState
   389  
   390  	p, label := g.p, g.label
   391  
   392  	// Assign stack offsets.
   393  	var l = layout{sp: "SP"}
   394  	for _, reg := range regNamesAMD64 {
   395  		if reg == "SP" || reg == "BP" {
   396  			continue
   397  		}
   398  		if !strings.HasPrefix(reg, "X") {
   399  			l.add("MOVQ", reg, 8)
   400  		}
   401  	}
   402  	// Create layouts for X, Y, and Z registers.
   403  	const (
   404  		numXRegs = 16
   405  		numZRegs = 32
   406  		numKRegs = 8
   407  	)
   408  	lZRegs := layout{sp: xReg} // Non-GP registers
   409  	lXRegs, lYRegs := lZRegs, lZRegs
   410  	for i := range numZRegs {
   411  		lZRegs.add("VMOVDQU64", fmt.Sprintf("Z%d", i), 512/8)
   412  		if i < numXRegs {
   413  			// Use SSE-only instructions for X registers.
   414  			lXRegs.add("MOVUPS", fmt.Sprintf("X%d", i), 128/8)
   415  			lYRegs.add("VMOVDQU", fmt.Sprintf("Y%d", i), 256/8)
   416  		}
   417  	}
   418  	for i := range numKRegs {
   419  		lZRegs.add("KMOVQ", fmt.Sprintf("K%d", i), 8)
   420  	}
   421  	// The Z layout is the most general, so we line up the others with that one.
   422  	// We don't have to do this, but it results in a nice Go type. If we split
   423  	// this into multiple types, we probably should stop doing this.
   424  	for i := range lXRegs.regs {
   425  		for j := range lXRegs.regs[i].regs {
   426  			lXRegs.regs[i].regs[j].pos = lZRegs.regs[i].regs[j].pos
   427  			lYRegs.regs[i].regs[j].pos = lZRegs.regs[i].regs[j].pos
   428  		}
   429  
   430  	}
   431  	writeXRegs(g.goarch, &lZRegs)
   432  
   433  	p("PUSHQ BP")
   434  	p("MOVQ SP, BP")
   435  	p("// Save flags before clobbering them")
   436  	p("PUSHFQ")
   437  	p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
   438  	p("ADJSP $%d", l.stack)
   439  	p("// But vet doesn't know ADJSP, so suppress vet stack checking")
   440  	p("NOP SP")
   441  
   442  	p("// Save GPs")
   443  	l.save(g)
   444  
   445  	// In general, the limitations on asynchronous preemption mean we only
   446  	// preempt in ABIInternal code. However, there's at least one exception to
   447  	// this: when we're in an open-coded transition between an ABIInternal
   448  	// function and an ABI0 call. We could more carefully arrange unsafe points
   449  	// to avoid ever landing in ABI0, but it's easy to just make this code not
   450  	// sensitive to the ABI we're preempting. The CALL to asyncPreempt2 will
   451  	// ensure we're in ABIInternal register state.
   452  	p("// Save extended register state to p.xRegs.scratch")
   453  	p("// Don't make assumptions about ABI register state. See mkpreempt.go")
   454  	p("get_tls(CX)")
   455  	p("MOVQ g(CX), R14")
   456  	p("MOVQ g_m(R14), %s", xReg)
   457  	p("MOVQ m_p(%s), %s", xReg, xReg)
   458  	p("LEAQ (p_xRegs+xRegPerP_scratch)(%s), %s", xReg, xReg)
   459  
   460  	// Which registers do we need to save?
   461  	p("#ifdef GOEXPERIMENT_simd")
   462  	p("CMPB internal∕cpu·X86+const_offsetX86HasAVX512(SB), $1")
   463  	p("JE saveAVX512")
   464  	p("CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1")
   465  	p("JE saveAVX2")
   466  	p("#endif")
   467  
   468  	// No features. Assume only SSE.
   469  	label("saveSSE:")
   470  	lXRegs.save(g)
   471  	p("JMP preempt")
   472  
   473  	label("saveAVX2:")
   474  	lYRegs.save(g)
   475  	p("JMP preempt")
   476  
   477  	label("saveAVX512:")
   478  	lZRegs.save(g)
   479  	p("JMP preempt")
   480  
   481  	label("preempt:")
   482  	p("CALL ·asyncPreempt2(SB)")
   483  
   484  	p("// Restore non-GPs from *p.xRegs.cache")
   485  	p("MOVQ g_m(R14), %s", xReg)
   486  	p("MOVQ m_p(%s), %s", xReg, xReg)
   487  	p("MOVQ (p_xRegs+xRegPerP_cache)(%s), %s", xReg, xReg)
   488  
   489  	p("#ifdef GOEXPERIMENT_simd")
   490  	p("CMPB internal∕cpu·X86+const_offsetX86HasAVX512(SB), $1")
   491  	p("JE restoreAVX512")
   492  	p("CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1")
   493  	p("JE restoreAVX2")
   494  	p("#endif")
   495  
   496  	label("restoreSSE:")
   497  	lXRegs.restore(g)
   498  	p("JMP restoreGPs")
   499  
   500  	label("restoreAVX2:")
   501  	lYRegs.restore(g)
   502  	p("JMP restoreGPs")
   503  
   504  	label("restoreAVX512:")
   505  	lZRegs.restore(g)
   506  	p("JMP restoreGPs")
   507  
   508  	label("restoreGPs:")
   509  	p("// Restore GPs")
   510  	l.restore(g)
   511  	p("ADJSP $%d", -l.stack)
   512  	p("POPFQ")
   513  	p("POPQ BP")
   514  	p("RET")
   515  }
   516  
   517  func genARM(g *gen) {
   518  	p := g.p
   519  
   520  	// Add integer registers R0-R12.
   521  	// R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
   522  	var l = layout{sp: "R13", stack: 4} // add LR slot
   523  	for i := 0; i <= 12; i++ {
   524  		reg := fmt.Sprintf("R%d", i)
   525  		if i == 10 {
   526  			continue // R10 is g register, no need to save/restore
   527  		}
   528  		l.add("MOVW", reg, 4)
   529  	}
   530  	// Add flag register.
   531  	l.addSpecial(
   532  		"MOVW CPSR, R0\nMOVW R0, %d(R13)",
   533  		"MOVW %d(R13), R0\nMOVW R0, CPSR",
   534  		4)
   535  
   536  	// Add floating point registers F0-F15 and flag register.
   537  	var lfp = layout{stack: l.stack, sp: "R13"}
   538  	lfp.addSpecial(
   539  		"MOVW FPCR, R0\nMOVW R0, %d(R13)",
   540  		"MOVW %d(R13), R0\nMOVW R0, FPCR",
   541  		4)
   542  	for i := 0; i <= 15; i++ {
   543  		reg := fmt.Sprintf("F%d", i)
   544  		lfp.add("MOVD", reg, 8)
   545  	}
   546  
   547  	p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
   548  	l.save(g)
   549  	p("MOVB ·goarmsoftfp(SB), R0\nCMP $0, R0\nBNE nofp") // test goarmsoftfp, and skip FP registers if goarmsoftfp!=0.
   550  	lfp.save(g)
   551  	g.label("nofp:")
   552  	p("CALL ·asyncPreempt2(SB)")
   553  	p("MOVB ·goarmsoftfp(SB), R0\nCMP $0, R0\nBNE nofp2") // test goarmsoftfp, and skip FP registers if goarmsoftfp!=0.
   554  	lfp.restore(g)
   555  	g.label("nofp2:")
   556  	l.restore(g)
   557  
   558  	p("MOVW %d(R13), R14", lfp.stack)     // sigctxt.pushCall pushes LR on stack, restore it
   559  	p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
   560  	p("UNDEF")                            // shouldn't get here
   561  }
   562  
   563  func genARM64(g *gen) {
   564  	const vReg = "R0" // *xRegState
   565  	p := g.p
   566  	// Add integer registers R0-R26
   567  	// R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
   568  	// and not saved here.
   569  	var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
   570  	for i := 0; i < 26; i += 2 {
   571  		if i == 18 {
   572  			i--
   573  			continue // R18 is not used, skip
   574  		}
   575  		regs := []regInfo{
   576  			{name: fmt.Sprintf("R%d", i), size: 8},
   577  			{name: fmt.Sprintf("R%d", i+1), size: 8},
   578  		}
   579  		l.add2("STP", "LDP", regs, [2]string{"(", ")"}, false)
   580  	}
   581  	// Add flag registers.
   582  	l.addSpecial(
   583  		"MOVD NZCV, R0\nMOVD R0, %d(RSP)",
   584  		"MOVD %d(RSP), R0\nMOVD R0, NZCV",
   585  		8)
   586  	l.addSpecial(
   587  		"MOVD FPSR, R0\nMOVD R0, %d(RSP)",
   588  		"MOVD %d(RSP), R0\nMOVD R0, FPSR",
   589  		8)
   590  	// TODO: FPCR? I don't think we'll change it, so no need to save.
   591  	// Add floating point registers F0-F31.
   592  	lVRegs := layout{sp: vReg} // Non-GP registers
   593  	for i := 0; i < 31; i += 4 {
   594  		regs := []regInfo{
   595  			{name: fmt.Sprintf("V%d", i), suffix: ".B16", size: 16, pos: 64},
   596  			{name: fmt.Sprintf("V%d", i+1), suffix: ".B16", size: 16, pos: 64},
   597  			{name: fmt.Sprintf("V%d", i+2), suffix: ".B16", size: 16, pos: 64},
   598  			{name: fmt.Sprintf("V%d", i+3), suffix: ".B16", size: 16, pos: 64},
   599  		}
   600  		lVRegs.add2("VST1.P", "VLD1.P", regs, [2]string{"[", "]"}, true)
   601  	}
   602  	writeXRegs(g.goarch, &lVRegs)
   603  	if l.stack%16 != 0 {
   604  		l.stack += 8 // SP needs 16-byte alignment
   605  	}
   606  
   607  	// allocate frame, save PC of interrupted instruction (in LR)
   608  	p("MOVD R30, %d(RSP)", -l.stack)
   609  	p("SUB $%d, RSP", l.stack)
   610  	p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
   611  	p("SUB $8, RSP, R29")  // set up new frame pointer
   612  	// On iOS, save the LR again after decrementing SP. We run the
   613  	// signal handler on the G stack (as it doesn't support sigaltstack),
   614  	// so any writes below SP may be clobbered.
   615  	p("#ifdef GOOS_ios")
   616  	p("MOVD R30, (RSP)")
   617  	p("#endif")
   618  
   619  	p("// Save GPs")
   620  	l.save(g)
   621  	p("// Save extended register state to p.xRegs.scratch")
   622  	p("MOVD g_m(g), %s", vReg)
   623  	p("MOVD m_p(%s), %s", vReg, vReg)
   624  	p("ADD $(p_xRegs+xRegPerP_scratch), %s, %s", vReg, vReg)
   625  	lVRegs.save(g)
   626  	p("CALL ·asyncPreempt2(SB)")
   627  	p("// Restore non-GPs from *p.xRegs.cache")
   628  	p("MOVD g_m(g), %s", vReg)
   629  	p("MOVD m_p(%s), %s", vReg, vReg)
   630  	p("MOVD (p_xRegs+xRegPerP_cache)(%s), %s", vReg, vReg)
   631  	lVRegs.restoreDirect(g)
   632  	p("// Restore GPs")
   633  	l.restore(g)
   634  
   635  	p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
   636  	p("MOVD -8(RSP), R29")          // restore frame pointer
   637  	p("MOVD (RSP), R27")            // load PC to REGTMP
   638  	p("ADD $%d, RSP", l.stack+16)   // pop frame (including the space pushed by sigctxt.pushCall)
   639  	p("RET (R27)")
   640  }
   641  
   642  func genMIPS(g *gen, _64bit bool) {
   643  	p := g.p
   644  
   645  	mov := "MOVW"
   646  	movf := "MOVF"
   647  	add := "ADD"
   648  	sub := "SUB"
   649  	r28 := "R28"
   650  	regsize := 4
   651  	softfloat := "GOMIPS_softfloat"
   652  	if _64bit {
   653  		mov = "MOVV"
   654  		movf = "MOVD"
   655  		add = "ADDV"
   656  		sub = "SUBV"
   657  		r28 = "RSB"
   658  		regsize = 8
   659  		softfloat = "GOMIPS64_softfloat"
   660  	}
   661  
   662  	// Add integer registers R1-R22, R24-R25, R28
   663  	// R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
   664  	// and not saved here. R26 and R27 are reserved by kernel and not used.
   665  	var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
   666  	for i := 1; i <= 25; i++ {
   667  		if i == 23 {
   668  			continue // R23 is REGTMP
   669  		}
   670  		reg := fmt.Sprintf("R%d", i)
   671  		l.add(mov, reg, regsize)
   672  	}
   673  	l.add(mov, r28, regsize)
   674  	l.addSpecial(
   675  		mov+" HI, R1\n"+mov+" R1, %d(R29)",
   676  		mov+" %d(R29), R1\n"+mov+" R1, HI",
   677  		regsize)
   678  	l.addSpecial(
   679  		mov+" LO, R1\n"+mov+" R1, %d(R29)",
   680  		mov+" %d(R29), R1\n"+mov+" R1, LO",
   681  		regsize)
   682  
   683  	// Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
   684  	var lfp = layout{sp: "R29", stack: l.stack}
   685  	lfp.addSpecial(
   686  		mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
   687  		mov+" %d(R29), R1\n"+mov+" R1, FCR31",
   688  		regsize)
   689  	// Add floating point registers F0-F31.
   690  	for i := 0; i <= 31; i++ {
   691  		reg := fmt.Sprintf("F%d", i)
   692  		lfp.add(movf, reg, regsize)
   693  	}
   694  
   695  	// allocate frame, save PC of interrupted instruction (in LR)
   696  	p(mov+" R31, -%d(R29)", lfp.stack)
   697  	p(sub+" $%d, R29", lfp.stack)
   698  
   699  	l.save(g)
   700  	p("#ifndef %s", softfloat)
   701  	lfp.save(g)
   702  	p("#endif")
   703  	p("CALL ·asyncPreempt2(SB)")
   704  	p("#ifndef %s", softfloat)
   705  	lfp.restore(g)
   706  	p("#endif")
   707  	l.restore(g)
   708  
   709  	p(mov+" %d(R29), R31", lfp.stack)     // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
   710  	p(mov + " (R29), R23")                // load PC to REGTMP
   711  	p(add+" $%d, R29", lfp.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
   712  	p("JMP (R23)")
   713  }
   714  
   715  func genLoong64(g *gen) {
   716  	const xReg = "R4" // *xRegState
   717  
   718  	p, label := g.p, g.label
   719  
   720  	mov := "MOVV"
   721  	add := "ADDV"
   722  	sub := "SUBV"
   723  	regsize := 8
   724  
   725  	// Add integer registers r4-r21 r23-r29 r31
   726  	// R0 (zero), R30 (REGTMP), R2 (tp), R3 (SP), R22 (g), R1 (LR) are special,
   727  	var l = layout{sp: "R3", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
   728  	for i := 4; i <= 31; i++ {
   729  		if i == 22 || i == 30 {
   730  			continue
   731  		}
   732  		reg := fmt.Sprintf("R%d", i)
   733  		l.add(mov, reg, regsize)
   734  	}
   735  
   736  	// Add condition flag register fcc0-fcc7
   737  	sv := ""
   738  	rs := ""
   739  	last := 7
   740  	for i := 0; i <= last; i++ {
   741  		msb := 7 + (i * 8)
   742  		lsb := 0 + (i * 8)
   743  
   744  		// MOVV FCCx, R4,
   745  		// BSTRINSV $msb, R4, $lsb, R5
   746  		sv += fmt.Sprintf("%s FCC%d, R4\n", mov, i)
   747  		sv += fmt.Sprintf("BSTRINSV $%d, R4, $%d, R5\n", msb, lsb)
   748  
   749  		// BSTRPICKV $msb, R5, $lsb, R4
   750  		// MOVV R4, FCCx
   751  		rs += fmt.Sprintf("BSTRPICKV $%d, R5, $%d, R4\n", msb, lsb)
   752  		rs += fmt.Sprintf("%s R4, FCC%d", mov, i)
   753  		if i != last {
   754  			rs += fmt.Sprintf("\n")
   755  		}
   756  	}
   757  	l.addSpecial(
   758  		sv+mov+" R5, %d(R3)",
   759  		mov+" %d(R3), R5\n"+rs,
   760  		regsize)
   761  
   762  	// Create layouts for lasx, lsx and fp registers.
   763  	lasxRegs := layout{sp: xReg}
   764  	lsxRegs := lasxRegs
   765  	fpRegs := lasxRegs
   766  	for i := 0; i <= 31; i++ {
   767  		lasxRegs.add("XVMOVQ", fmt.Sprintf("X%d", i), 256/8)
   768  		lsxRegs.add("VMOVQ", fmt.Sprintf("V%d", i), 128/8)
   769  		fpRegs.add("MOVD", fmt.Sprintf("F%d", i), 64/8)
   770  	}
   771  
   772  	for i := range lsxRegs.regs {
   773  		for j := range lsxRegs.regs[i].regs {
   774  			lsxRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos
   775  			fpRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos
   776  		}
   777  	}
   778  	writeXRegs(g.goarch, &lasxRegs)
   779  
   780  	// allocate frame, save PC of interrupted instruction (in LR)
   781  	p(mov+" R1, -%d(R3)", l.stack)
   782  	p(sub+" $%d, R3", l.stack)
   783  
   784  	p("// Save GPs")
   785  	l.save(g)
   786  
   787  	p("// Save extended register state to p.xRegs.scratch")
   788  	p("MOVV g_m(g), %s", xReg)
   789  	p("MOVV m_p(%s), %s", xReg, xReg)
   790  	p("ADDV $(p_xRegs+xRegPerP_scratch), %s, %s", xReg, xReg)
   791  
   792  	p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5")
   793  	p("BNE R5, saveLASX")
   794  
   795  	p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5")
   796  	p("BNE R5, saveLSX")
   797  
   798  	label("saveFP:")
   799  	fpRegs.save(g)
   800  	p("JMP preempt")
   801  
   802  	label("saveLSX:")
   803  	lsxRegs.save(g)
   804  	p("JMP preempt")
   805  
   806  	label("saveLASX:")
   807  	lasxRegs.save(g)
   808  
   809  	label("preempt:")
   810  	p("CALL ·asyncPreempt2(SB)")
   811  
   812  	p("// Restore non-GPs from *p.xRegs.cache")
   813  	p("MOVV g_m(g), %s", xReg)
   814  	p("MOVV m_p(%s), %s", xReg, xReg)
   815  	p("MOVV (p_xRegs+xRegPerP_cache)(%s), %s", xReg, xReg)
   816  
   817  	p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5")
   818  	p("BNE R5, restoreLASX")
   819  
   820  	p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5")
   821  	p("BNE R5, restoreLSX")
   822  
   823  	label("restoreFP:")
   824  	fpRegs.restore(g)
   825  	p("JMP restoreGPs")
   826  
   827  	label("restoreLSX:")
   828  	lsxRegs.restore(g)
   829  	p("JMP restoreGPs")
   830  
   831  	label("restoreLASX:")
   832  	lasxRegs.restore(g)
   833  
   834  	p("// Restore GPs")
   835  	label("restoreGPs:")
   836  	l.restore(g)
   837  
   838  	p(mov+" %d(R3), R1", l.stack)      // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
   839  	p(mov + " (R3), R30")              // load PC to REGTMP
   840  	p(add+" $%d, R3", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
   841  	p("JMP (R30)")
   842  }
   843  
   844  func genPPC64(g *gen) {
   845  	p := g.p
   846  
   847  	// Add integer registers R3-R29
   848  	// R0 (zero), R1 (SP), R30 (g) are special and not saved here.
   849  	// R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
   850  	// R31 (REGTMP) will be saved manually.
   851  	var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
   852  	for i := 3; i <= 29; i++ {
   853  		if i == 12 || i == 13 {
   854  			// R12 has been saved in sigctxt.pushCall.
   855  			// R13 is TLS pointer, not used by Go code. we must NOT
   856  			// restore it, otherwise if we parked and resumed on a
   857  			// different thread we'll mess up TLS addresses.
   858  			continue
   859  		}
   860  		reg := fmt.Sprintf("R%d", i)
   861  		l.add("MOVD", reg, 8)
   862  	}
   863  	l.addSpecial(
   864  		"MOVW CR, R31\nMOVW R31, %d(R1)",
   865  		"MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
   866  		8)                                    // CR is 4-byte wide, but just keep the alignment
   867  	l.addSpecial(
   868  		"MOVD XER, R31\nMOVD R31, %d(R1)",
   869  		"MOVD %d(R1), R31\nMOVD R31, XER",
   870  		8)
   871  	// Add floating point registers F0-F31.
   872  	for i := 0; i <= 31; i++ {
   873  		reg := fmt.Sprintf("F%d", i)
   874  		l.add("FMOVD", reg, 8)
   875  	}
   876  	// Add floating point control/status register FPSCR.
   877  	l.addSpecial(
   878  		"MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
   879  		"FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
   880  		8)
   881  
   882  	p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
   883  	p("MOVD LR, R31")
   884  	p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
   885  
   886  	l.save(g)
   887  	p("CALL ·asyncPreempt2(SB)")
   888  	l.restore(g)
   889  
   890  	p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
   891  	p("MOVD R31, LR")
   892  	p("MOVD %d(R1), R2", l.stack+8)
   893  	p("MOVD %d(R1), R12", l.stack+16)
   894  	p("MOVD (R1), R31") // load PC to CTR
   895  	p("MOVD R31, CTR")
   896  	p("MOVD 32(R1), R31")        // restore R31
   897  	p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
   898  	p("JMP (CTR)")
   899  }
   900  
   901  func genRISCV64(g *gen) {
   902  	p := g.p
   903  
   904  	// X0 (zero), X1 (LR), X2 (SP), X3 (GP), X4 (TP), X27 (g), X31 (TMP) are special.
   905  	var l = layout{sp: "X2", stack: 8}
   906  
   907  	// Add integer registers (X5-X26, X28-30).
   908  	for i := 5; i < 31; i++ {
   909  		if i == 27 {
   910  			continue
   911  		}
   912  		reg := fmt.Sprintf("X%d", i)
   913  		l.add("MOV", reg, 8)
   914  	}
   915  
   916  	// Add floating point registers (F0-F31).
   917  	for i := 0; i <= 31; i++ {
   918  		reg := fmt.Sprintf("F%d", i)
   919  		l.add("MOVD", reg, 8)
   920  	}
   921  
   922  	p("MOV X1, -%d(X2)", l.stack)
   923  	p("SUB $%d, X2", l.stack)
   924  	l.save(g)
   925  	p("CALL ·asyncPreempt2(SB)")
   926  	l.restore(g)
   927  	p("MOV %d(X2), X1", l.stack)
   928  	p("MOV (X2), X31")
   929  	p("ADD $%d, X2", l.stack+8)
   930  	p("JMP (X31)")
   931  }
   932  
   933  func genS390X(g *gen) {
   934  	p := g.p
   935  
   936  	// Add integer registers R0-R12
   937  	// R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
   938  	// Saving R10 (REGTMP) is not necessary, but it is saved anyway.
   939  	var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
   940  	l.addSpecial(
   941  		"STMG R0, R12, %d(R15)",
   942  		"LMG %d(R15), R0, R12",
   943  		13*8)
   944  	// Add floating point registers F0-F31.
   945  	for i := 0; i <= 15; i++ {
   946  		reg := fmt.Sprintf("F%d", i)
   947  		l.add("FMOVD", reg, 8)
   948  	}
   949  
   950  	// allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
   951  	p("IPM R10") // save flags upfront, as ADD will clobber flags
   952  	p("MOVD R14, -%d(R15)", l.stack)
   953  	p("ADD $-%d, R15", l.stack)
   954  	p("MOVW R10, 8(R15)") // save flags
   955  
   956  	l.save(g)
   957  	p("CALL ·asyncPreempt2(SB)")
   958  	l.restore(g)
   959  
   960  	p("MOVD %d(R15), R14", l.stack)    // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
   961  	p("ADD $%d, R15", l.stack+8)       // pop frame (including the space pushed by sigctxt.pushCall)
   962  	p("MOVWZ -%d(R15), R10", l.stack)  // load flags to REGTMP
   963  	p("TMLH R10, $(3<<12)")            // restore flags
   964  	p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
   965  	p("JMP (R10)")
   966  }
   967  
   968  func genWasm(g *gen) {
   969  	p := g.p
   970  	p("// No async preemption on wasm")
   971  	p("UNDEF")
   972  }
   973  
   974  func notImplemented(g *gen) {
   975  	p := g.p
   976  	p("// Not implemented yet")
   977  	p("JMP ·abort(SB)")
   978  }
   979  

View as plain text