Source file src/cmd/compile/internal/ssa/branchelim.go

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import "cmd/internal/src"
     8  
     9  // branchelim tries to eliminate branches by
    10  // generating CondSelect instructions.
    11  //
    12  // Search for basic blocks that look like
    13  //
    14  //	bb0            bb0
    15  //	 | \          /   \
    16  //	 | bb1  or  bb1   bb2    <- trivial if/else blocks
    17  //	 | /          \   /
    18  //	bb2            bb3
    19  //
    20  // where the intermediate blocks are mostly empty (with no side-effects);
    21  // rewrite Phis in the postdominator as CondSelects.
    22  func branchelim(f *Func) {
    23  	// FIXME: add support for lowering CondSelects on more architectures
    24  	if !f.Config.haveCondSelect {
    25  		return
    26  	}
    27  
    28  	// Find all the values used in computing the address of any load.
    29  	// Typically these values have operations like AddPtr, Lsh64x64, etc.
    30  	loadAddr := f.newSparseSet(f.NumValues())
    31  	defer f.retSparseSet(loadAddr)
    32  	for _, b := range f.Blocks {
    33  		for _, v := range b.Values {
    34  			switch v.Op {
    35  			case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
    36  				loadAddr.add(v.Args[0].ID)
    37  			case OpMove:
    38  				loadAddr.add(v.Args[1].ID)
    39  			}
    40  		}
    41  	}
    42  	po := f.postorder()
    43  	for {
    44  		n := loadAddr.size()
    45  		for _, b := range po {
    46  			for i := len(b.Values) - 1; i >= 0; i-- {
    47  				v := b.Values[i]
    48  				if !loadAddr.contains(v.ID) {
    49  					continue
    50  				}
    51  				for _, a := range v.Args {
    52  					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
    53  						loadAddr.add(a.ID)
    54  					}
    55  				}
    56  			}
    57  		}
    58  		if loadAddr.size() == n {
    59  			break
    60  		}
    61  	}
    62  
    63  	change := true
    64  	for change {
    65  		change = false
    66  		for _, b := range f.Blocks {
    67  			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
    68  		}
    69  	}
    70  }
    71  
    72  func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
    73  	if loadAddr != nil && // prove calls this on some multiplies and doesn't take care of loadAddrs
    74  		loadAddr.contains(v.ID) {
    75  		// The result of the soon-to-be conditional move is used to compute a load address.
    76  		// We want to avoid generating a conditional move in this case
    77  		// because the load address would now be data-dependent on the condition.
    78  		// Previously it would only be control-dependent on the condition, which is faster
    79  		// if the branch predicts well (or possibly even if it doesn't, if the load will
    80  		// be an expensive cache miss).
    81  		// See issue #26306.
    82  		return false
    83  	}
    84  	if arch == "loong64" {
    85  		// We should not generate conditional moves if neither of the arguments is constant zero,
    86  		// because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
    87  		// register pressure.
    88  		if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
    89  			!(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
    90  			return false
    91  		}
    92  	}
    93  	// For now, stick to simple scalars that fit in registers
    94  	switch {
    95  	case v.Type.Size() > v.Block.Func.Config.RegSize:
    96  		return false
    97  	case v.Type.IsPtrShaped():
    98  		return true
    99  	case v.Type.IsInteger():
   100  		if arch == "amd64" && v.Type.Size() < 2 {
   101  			// amd64 doesn't support CMOV with byte registers
   102  			return false
   103  		}
   104  		return true
   105  	default:
   106  		return false
   107  	}
   108  }
   109  
   110  // elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
   111  // loadAddr is a set of values which are used to compute the address of a load.
   112  // Those values are exempt from CMOV generation.
   113  func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
   114  	// See if dom is an If with one arm that
   115  	// is trivial and succeeded by the other
   116  	// successor of dom.
   117  	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
   118  		return false
   119  	}
   120  	var simple, post *Block
   121  	for i := range dom.Succs {
   122  		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
   123  		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
   124  			simple = bb
   125  			post = other
   126  			break
   127  		}
   128  	}
   129  	if simple == nil || len(post.Preds) != 2 || post == dom {
   130  		return false
   131  	}
   132  
   133  	// We've found our diamond CFG of blocks.
   134  	// Now decide if fusing 'simple' into dom+post
   135  	// looks profitable.
   136  
   137  	// Check that there are Phis, and that all of them
   138  	// can be safely rewritten to CondSelect.
   139  	hasphis := false
   140  	for _, v := range post.Values {
   141  		if v.Op == OpPhi {
   142  			hasphis = true
   143  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   144  				return false
   145  			}
   146  		}
   147  	}
   148  	if !hasphis {
   149  		return false
   150  	}
   151  
   152  	// Pick some upper bound for the number of instructions
   153  	// we'd be willing to execute just to generate a dead
   154  	// argument to CondSelect. In the worst case, this is
   155  	// the number of useless instructions executed.
   156  	const maxfuseinsts = 2
   157  
   158  	if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
   159  		return false
   160  	}
   161  
   162  	// Replace Phi instructions in b with CondSelect instructions
   163  	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
   164  	for _, v := range post.Values {
   165  		if v.Op != OpPhi {
   166  			continue
   167  		}
   168  		v.Op = OpCondSelect
   169  		if swap {
   170  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   171  		}
   172  		v.AddArg(dom.Controls[0])
   173  	}
   174  
   175  	// Put all of the instructions into 'dom'
   176  	// and update the CFG appropriately.
   177  	dom.Kind = post.Kind
   178  	dom.CopyControls(post)
   179  	dom.Aux = post.Aux
   180  	dom.Succs = append(dom.Succs[:0], post.Succs...)
   181  	for i := range dom.Succs {
   182  		e := dom.Succs[i]
   183  		e.b.Preds[e.i].b = dom
   184  	}
   185  
   186  	// Try really hard to preserve statement marks attached to blocks.
   187  	simplePos := simple.Pos
   188  	postPos := post.Pos
   189  	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
   190  	postStmt := postPos.IsStmt() == src.PosIsStmt
   191  
   192  	for _, v := range simple.Values {
   193  		v.Block = dom
   194  	}
   195  	for _, v := range post.Values {
   196  		v.Block = dom
   197  	}
   198  
   199  	// findBlockPos determines if b contains a stmt-marked value
   200  	// that has the same line number as the Pos for b itself.
   201  	// (i.e. is the position on b actually redundant?)
   202  	findBlockPos := func(b *Block) bool {
   203  		pos := b.Pos
   204  		for _, v := range b.Values {
   205  			// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
   206  			if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
   207  				return true
   208  			}
   209  		}
   210  		return false
   211  	}
   212  	if simpleStmt {
   213  		simpleStmt = !findBlockPos(simple)
   214  		if !simpleStmt && simplePos.SameFileAndLine(postPos) {
   215  			postStmt = false
   216  		}
   217  
   218  	}
   219  	if postStmt {
   220  		postStmt = !findBlockPos(post)
   221  	}
   222  
   223  	// If simpleStmt and/or postStmt are still true, then try harder
   224  	// to find the corresponding statement marks new homes.
   225  
   226  	// setBlockPos determines if b contains a can-be-statement value
   227  	// that has the same line number as the Pos for b itself, and
   228  	// puts a statement mark on it, and returns whether it succeeded
   229  	// in this operation.
   230  	setBlockPos := func(b *Block) bool {
   231  		pos := b.Pos
   232  		for _, v := range b.Values {
   233  			if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
   234  				v.Pos = v.Pos.WithIsStmt()
   235  				return true
   236  			}
   237  		}
   238  		return false
   239  	}
   240  	// If necessary and possible, add a mark to a value in simple
   241  	if simpleStmt {
   242  		if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
   243  			postStmt = false
   244  		}
   245  	}
   246  	// If necessary and possible, add a mark to a value in post
   247  	if postStmt {
   248  		postStmt = !setBlockPos(post)
   249  	}
   250  
   251  	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
   252  	// try the values in the successor block if it is uncomplicated.
   253  	if postStmt {
   254  		if dom.Pos.IsStmt() != src.PosIsStmt {
   255  			dom.Pos = postPos
   256  		} else {
   257  			// Try the successor block
   258  			if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
   259  				succ := dom.Succs[0].Block()
   260  				for _, v := range succ.Values {
   261  					if isPoorStatementOp(v.Op) {
   262  						continue
   263  					}
   264  					if postPos.SameFileAndLine(v.Pos) {
   265  						v.Pos = v.Pos.WithIsStmt()
   266  					}
   267  					postStmt = false
   268  					break
   269  				}
   270  				// If postStmt still true, tag the block itself if possible
   271  				if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
   272  					succ.Pos = postPos
   273  				}
   274  			}
   275  		}
   276  	}
   277  
   278  	dom.Values = append(dom.Values, simple.Values...)
   279  	dom.Values = append(dom.Values, post.Values...)
   280  
   281  	// Trash 'post' and 'simple'
   282  	clobberBlock(post)
   283  	clobberBlock(simple)
   284  
   285  	f.invalidateCFG()
   286  	return true
   287  }
   288  
   289  // is this a BlockPlain with one predecessor?
   290  func isLeafPlain(b *Block) bool {
   291  	return b.Kind == BlockPlain && len(b.Preds) == 1
   292  }
   293  
   294  func clobberBlock(b *Block) {
   295  	b.Values = nil
   296  	b.Preds = nil
   297  	b.Succs = nil
   298  	b.Aux = nil
   299  	b.ResetControls()
   300  	b.Likely = BranchUnknown
   301  	b.Kind = BlockInvalid
   302  }
   303  
   304  // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
   305  // loadAddr is a set of values which are used to compute the address of a load.
   306  // Those values are exempt from CMOV generation.
   307  func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
   308  	// See if 'b' ends in an if/else: it should
   309  	// have two successors, both of which are BlockPlain
   310  	// and succeeded by the same block.
   311  	if b.Kind != BlockIf || b.Likely != BranchUnknown {
   312  		return false
   313  	}
   314  	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
   315  	if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
   316  		return false
   317  	}
   318  	if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
   319  		return false
   320  	}
   321  	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
   322  		return false
   323  	}
   324  	// block that postdominates the if/else
   325  	post := b.Succs[0].Block().Succs[0].Block()
   326  	if len(post.Preds) != 2 || post == b {
   327  		return false
   328  	}
   329  	hasphis := false
   330  	for _, v := range post.Values {
   331  		if v.Op == OpPhi {
   332  			hasphis = true
   333  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   334  				return false
   335  			}
   336  		}
   337  	}
   338  	if !hasphis {
   339  		return false
   340  	}
   341  
   342  	// Don't generate CondSelects if branch is cheaper.
   343  	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
   344  		return false
   345  	}
   346  
   347  	// now we're committed: rewrite each Phi as a CondSelect
   348  	swap := post.Preds[0].Block() != b.Succs[0].Block()
   349  	for _, v := range post.Values {
   350  		if v.Op != OpPhi {
   351  			continue
   352  		}
   353  		v.Op = OpCondSelect
   354  		if swap {
   355  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   356  		}
   357  		v.AddArg(b.Controls[0])
   358  	}
   359  
   360  	// Move the contents of all of these
   361  	// blocks into 'b' and update CFG edges accordingly
   362  	b.Kind = post.Kind
   363  	b.CopyControls(post)
   364  	b.Aux = post.Aux
   365  	b.Succs = append(b.Succs[:0], post.Succs...)
   366  	for i := range b.Succs {
   367  		e := b.Succs[i]
   368  		e.b.Preds[e.i].b = b
   369  	}
   370  	for i := range post.Values {
   371  		post.Values[i].Block = b
   372  	}
   373  	for i := range yes.Values {
   374  		yes.Values[i].Block = b
   375  	}
   376  	for i := range no.Values {
   377  		no.Values[i].Block = b
   378  	}
   379  	b.Values = append(b.Values, yes.Values...)
   380  	b.Values = append(b.Values, no.Values...)
   381  	b.Values = append(b.Values, post.Values...)
   382  
   383  	// trash post, yes, and no
   384  	clobberBlock(yes)
   385  	clobberBlock(no)
   386  	clobberBlock(post)
   387  
   388  	f.invalidateCFG()
   389  	return true
   390  }
   391  
   392  // shouldElimIfElse reports whether estimated cost of eliminating branch
   393  // is lower than threshold.
   394  func shouldElimIfElse(no, yes, post *Block, arch string) bool {
   395  	switch arch {
   396  	default:
   397  		return true
   398  	case "amd64":
   399  		const maxcost = 2
   400  		phi := 0
   401  		other := 0
   402  		for _, v := range post.Values {
   403  			if v.Op == OpPhi {
   404  				// Each phi results in CondSelect, which lowers into CMOV,
   405  				// CMOV has latency >1 on most CPUs.
   406  				phi++
   407  			}
   408  			for _, x := range v.Args {
   409  				if x.Block == no || x.Block == yes {
   410  					other++
   411  				}
   412  			}
   413  		}
   414  		cost := phi * 1
   415  		if phi > 1 {
   416  			// If we have more than 1 phi and some values in post have args
   417  			// in yes or no blocks, we may have to recalculate condition, because
   418  			// those args may clobber flags. For now assume that all operations clobber flags.
   419  			cost += other * 1
   420  		}
   421  		return cost < maxcost
   422  	}
   423  }
   424  
   425  // canSpeculativelyExecute reports whether every value in the block can
   426  // be evaluated without causing any observable side effects (memory
   427  // accesses, panics and so on) except for execution time changes. It
   428  // also ensures that the block does not contain any phis which we can't
   429  // speculatively execute.
   430  // Warning: this function cannot currently detect values that represent
   431  // instructions the execution of which need to be guarded with CPU
   432  // hardware feature checks. See issue #34950.
   433  func canSpeculativelyExecute(b *Block) bool {
   434  	// don't fuse memory ops, Phi ops, divides (can panic),
   435  	// or anything else with side-effects
   436  	for _, v := range b.Values {
   437  		if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) ||
   438  			v.Type.IsMemory() || opcodeTable[v.Op].hasSideEffects {
   439  			return false
   440  		}
   441  
   442  		// Allow inlining markers to be speculatively executed
   443  		// even though they have a memory argument.
   444  		// See issue #74915.
   445  		if v.Op != OpInlMark && v.MemoryArg() != nil {
   446  			return false
   447  		}
   448  	}
   449  	return true
   450  }
   451  
   452  func isDivMod(op Op) bool {
   453  	switch op {
   454  	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
   455  		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
   456  		OpDiv32F, OpDiv64F,
   457  		OpMod8, OpMod8u, OpMod16, OpMod16u,
   458  		OpMod32, OpMod32u, OpMod64, OpMod64u:
   459  		return true
   460  	default:
   461  		return false
   462  	}
   463  }
   464  
   465  func isPtrArithmetic(op Op) bool {
   466  	// Pointer arithmetic can't be speculatively executed because the result
   467  	// may be an invalid pointer (if, for example, the condition is that the
   468  	// base pointer is not nil). See issue 56990.
   469  	switch op {
   470  	case OpOffPtr, OpAddPtr, OpSubPtr:
   471  		return true
   472  	default:
   473  		return false
   474  	}
   475  }
   476  

View as plain text