Source file src/cmd/compile/internal/ssa/fuse.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/internal/src"
     9  	"fmt"
    10  )
    11  
    12  // fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck).
    13  func fuseEarly(f *Func) {
    14  	fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeSingleBitDifference|fuseTypeNanCheck)
    15  }
    16  
    17  // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
    18  func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
    19  
    20  type fuseType uint8
    21  
    22  const (
    23  	fuseTypePlain fuseType = 1 << iota
    24  	fuseTypeIf
    25  	fuseTypeIntInRange
    26  	fuseTypeSingleBitDifference
    27  	fuseTypeNanCheck
    28  	fuseTypeBranchRedirect
    29  	fuseTypeShortCircuit
    30  )
    31  
    32  // fuse simplifies control flow by joining basic blocks.
    33  func fuse(f *Func, typ fuseType) {
    34  	for changed := true; changed; {
    35  		changed = false
    36  		// Be sure to avoid quadratic behavior in fuseBlockPlain. See issue 13554.
    37  		// Previously this was dealt with using backwards iteration, now fuseBlockPlain
    38  		// handles large runs of blocks.
    39  		for i := len(f.Blocks) - 1; i >= 0; i-- {
    40  			b := f.Blocks[i]
    41  			if typ&fuseTypeIf != 0 {
    42  				changed = fuseBlockIf(b) || changed
    43  			}
    44  			if typ&fuseTypeIntInRange != 0 {
    45  				changed = fuseIntInRange(b) || changed
    46  			}
    47  			if typ&fuseTypeSingleBitDifference != 0 {
    48  				changed = fuseSingleBitDifference(b) || changed
    49  			}
    50  			if typ&fuseTypeNanCheck != 0 {
    51  				changed = fuseNanCheck(b) || changed
    52  			}
    53  			if typ&fuseTypePlain != 0 {
    54  				changed = fuseBlockPlain(b) || changed
    55  			}
    56  			if typ&fuseTypeShortCircuit != 0 {
    57  				changed = shortcircuitBlock(b) || changed
    58  			}
    59  		}
    60  
    61  		if typ&fuseTypeBranchRedirect != 0 {
    62  			changed = fuseBranchRedirect(f) || changed
    63  		}
    64  		if changed {
    65  			f.invalidateCFG()
    66  		}
    67  	}
    68  }
    69  
    70  // fuseBlockIf handles the following cases where s0 and s1 are empty blocks.
    71  //
    72  //	   b        b           b       b
    73  //	\ / \ /    | \  /    \ / |     | |
    74  //	 s0  s1    |  s1      s0 |     | |
    75  //	  \ /      | /         \ |     | |
    76  //	   ss      ss           ss      ss
    77  //
    78  // If all Phi ops in ss have identical variables for slots corresponding to
    79  // s0, s1 and b then the branch can be dropped.
    80  // This optimization often comes up in switch statements with multiple
    81  // expressions in a case clause:
    82  //
    83  //	switch n {
    84  //	  case 1,2,3: return 4
    85  //	}
    86  //
    87  // TODO: If ss doesn't contain any OpPhis, are s0 and s1 dead code anyway.
    88  func fuseBlockIf(b *Block) bool {
    89  	if b.Kind != BlockIf {
    90  		return false
    91  	}
    92  	// It doesn't matter how much Preds does s0 or s1 have.
    93  	var ss0, ss1 *Block
    94  	s0 := b.Succs[0].b
    95  	i0 := b.Succs[0].i
    96  	if s0.Kind != BlockPlain || !isEmpty(s0) {
    97  		s0, ss0 = b, s0
    98  	} else {
    99  		ss0 = s0.Succs[0].b
   100  		i0 = s0.Succs[0].i
   101  	}
   102  	s1 := b.Succs[1].b
   103  	i1 := b.Succs[1].i
   104  	if s1.Kind != BlockPlain || !isEmpty(s1) {
   105  		s1, ss1 = b, s1
   106  	} else {
   107  		ss1 = s1.Succs[0].b
   108  		i1 = s1.Succs[0].i
   109  	}
   110  	if ss0 != ss1 {
   111  		if s0.Kind == BlockPlain && isEmpty(s0) && s1.Kind == BlockPlain && isEmpty(s1) {
   112  			// Two special cases where both s0, s1 and ss are empty blocks.
   113  			if s0 == ss1 {
   114  				s0, ss0 = b, ss1
   115  			} else if ss0 == s1 {
   116  				s1, ss1 = b, ss0
   117  			} else {
   118  				return false
   119  			}
   120  		} else {
   121  			return false
   122  		}
   123  	}
   124  	ss := ss0
   125  
   126  	// s0 and s1 are equal with b if the corresponding block is missing
   127  	// (2nd, 3rd and 4th case in the figure).
   128  
   129  	for _, v := range ss.Values {
   130  		if v.Op == OpPhi && v.Uses > 0 && v.Args[i0] != v.Args[i1] {
   131  			return false
   132  		}
   133  	}
   134  
   135  	// We do not need to redirect the Preds of s0 and s1 to ss,
   136  	// the following optimization will do this.
   137  	b.removeEdge(0)
   138  	if s0 != b && len(s0.Preds) == 0 {
   139  		s0.removeEdge(0)
   140  		// Move any (dead) values in s0 to b,
   141  		// where they will be eliminated by the next deadcode pass.
   142  		for _, v := range s0.Values {
   143  			v.Block = b
   144  		}
   145  		b.Values = append(b.Values, s0.Values...)
   146  		// Clear s0.
   147  		s0.Kind = BlockInvalid
   148  		s0.Values = nil
   149  		s0.Succs = nil
   150  		s0.Preds = nil
   151  	}
   152  
   153  	b.Kind = BlockPlain
   154  	b.Likely = BranchUnknown
   155  	b.ResetControls()
   156  	// The values in b may be dead codes, and clearing them in time may
   157  	// obtain new optimization opportunities.
   158  	// First put dead values that can be deleted into a slice walkValues.
   159  	// Then put their arguments in walkValues before resetting the dead values
   160  	// in walkValues, because the arguments may also become dead values.
   161  	walkValues := []*Value{}
   162  	for _, v := range b.Values {
   163  		if v.Uses == 0 && v.removeable() {
   164  			walkValues = append(walkValues, v)
   165  		}
   166  	}
   167  	for len(walkValues) != 0 {
   168  		v := walkValues[len(walkValues)-1]
   169  		walkValues = walkValues[:len(walkValues)-1]
   170  		if v.Uses == 0 && v.removeable() {
   171  			walkValues = append(walkValues, v.Args...)
   172  			v.reset(OpInvalid)
   173  		}
   174  	}
   175  	return true
   176  }
   177  
   178  // isEmpty reports whether b contains any live values.
   179  // There may be false positives.
   180  func isEmpty(b *Block) bool {
   181  	for _, v := range b.Values {
   182  		if v.Uses > 0 || v.Op.IsCall() || v.Op.HasSideEffects() || v.Type.IsVoid() || opcodeTable[v.Op].nilCheck {
   183  			return false
   184  		}
   185  	}
   186  	return true
   187  }
   188  
   189  // fuseBlockPlain handles a run of blocks with length >= 2,
   190  // whose interior has single predecessors and successors,
   191  // b must be BlockPlain, allowing it to be any node except the
   192  // last (multiple successors means not BlockPlain).
   193  // Cycles are handled and merged into b's successor.
   194  func fuseBlockPlain(b *Block) bool {
   195  	if b.Kind != BlockPlain {
   196  		return false
   197  	}
   198  
   199  	c := b.Succs[0].b
   200  	if len(c.Preds) != 1 || c == b { // At least 2 distinct blocks.
   201  		return false
   202  	}
   203  
   204  	// find earliest block in run.  Avoid simple cycles.
   205  	for len(b.Preds) == 1 && b.Preds[0].b != c && b.Preds[0].b.Kind == BlockPlain {
   206  		b = b.Preds[0].b
   207  	}
   208  
   209  	// find latest block in run.  Still beware of simple cycles.
   210  	for {
   211  		if c.Kind != BlockPlain {
   212  			break
   213  		} // Has exactly 1 successor
   214  		cNext := c.Succs[0].b
   215  		if cNext == b {
   216  			break
   217  		} // not a cycle
   218  		if len(cNext.Preds) != 1 {
   219  			break
   220  		} // no other incoming edge
   221  		c = cNext
   222  	}
   223  
   224  	// Try to preserve any statement marks on the ends of blocks; move values to C
   225  	var b_next *Block
   226  	for bx := b; bx != c; bx = b_next {
   227  		// For each bx with an end-of-block statement marker,
   228  		// try to move it to a value in the next block,
   229  		// or to the next block's end, if possible.
   230  		b_next = bx.Succs[0].b
   231  		if bx.Pos.IsStmt() == src.PosIsStmt {
   232  			l := bx.Pos.Line() // looking for another place to mark for line l
   233  			outOfOrder := false
   234  			for _, v := range b_next.Values {
   235  				if v.Pos.IsStmt() == src.PosNotStmt {
   236  					continue
   237  				}
   238  				if l == v.Pos.Line() { // Found a Value with same line, therefore done.
   239  					v.Pos = v.Pos.WithIsStmt()
   240  					l = 0
   241  					break
   242  				}
   243  				if l < v.Pos.Line() {
   244  					// The order of values in a block is not specified so OOO in a block is not interesting,
   245  					// but they do all come before the end of the block, so this disqualifies attaching to end of b_next.
   246  					outOfOrder = true
   247  				}
   248  			}
   249  			if l != 0 && !outOfOrder && (b_next.Pos.Line() == l || b_next.Pos.IsStmt() != src.PosIsStmt) {
   250  				b_next.Pos = bx.Pos.WithIsStmt()
   251  			}
   252  		}
   253  		// move all of bx's values to c (note containing loop excludes c)
   254  		for _, v := range bx.Values {
   255  			v.Block = c
   256  		}
   257  	}
   258  
   259  	// Compute the total number of values and find the largest value slice in the run, to maximize chance of storage reuse.
   260  	total := 0
   261  	totalBeforeMax := 0 // number of elements preceding the maximum block (i.e. its position in the result).
   262  	max_b := b          // block with maximum capacity
   263  
   264  	for bx := b; ; bx = bx.Succs[0].b {
   265  		if cap(bx.Values) > cap(max_b.Values) {
   266  			totalBeforeMax = total
   267  			max_b = bx
   268  		}
   269  		total += len(bx.Values)
   270  		if bx == c {
   271  			break
   272  		}
   273  	}
   274  
   275  	// Use c's storage if fused blocks will fit, else use the max if that will fit, else allocate new storage.
   276  
   277  	// Take care to avoid c.Values pointing to b.valstorage.
   278  	// See golang.org/issue/18602.
   279  
   280  	// It's important to keep the elements in the same order; maintenance of
   281  	// debugging information depends on the order of *Values in Blocks.
   282  	// This can also cause changes in the order (which may affect other
   283  	// optimizations and possibly compiler output) for 32-vs-64 bit compilation
   284  	// platforms (word size affects allocation bucket size affects slice capacity).
   285  
   286  	// figure out what slice will hold the values,
   287  	// preposition the destination elements if not allocating new storage
   288  	var t []*Value
   289  	if total <= len(c.valstorage) {
   290  		t = c.valstorage[:total]
   291  		max_b = c
   292  		totalBeforeMax = total - len(c.Values)
   293  		copy(t[totalBeforeMax:], c.Values)
   294  	} else if total <= cap(max_b.Values) { // in place, somewhere
   295  		t = max_b.Values[0:total]
   296  		copy(t[totalBeforeMax:], max_b.Values)
   297  	} else {
   298  		t = make([]*Value, total)
   299  		max_b = nil
   300  	}
   301  
   302  	// copy the values
   303  	copyTo := 0
   304  	for bx := b; ; bx = bx.Succs[0].b {
   305  		if bx != max_b {
   306  			copy(t[copyTo:], bx.Values)
   307  		} else if copyTo != totalBeforeMax { // trust but verify.
   308  			panic(fmt.Errorf("totalBeforeMax (%d) != copyTo (%d), max_b=%v, b=%v, c=%v", totalBeforeMax, copyTo, max_b, b, c))
   309  		}
   310  		if bx == c {
   311  			break
   312  		}
   313  		copyTo += len(bx.Values)
   314  	}
   315  	c.Values = t
   316  
   317  	// replace b->c edge with preds(b) -> c
   318  	c.predstorage[0] = Edge{}
   319  	if len(b.Preds) > len(b.predstorage) {
   320  		c.Preds = b.Preds
   321  	} else {
   322  		c.Preds = append(c.predstorage[:0], b.Preds...)
   323  	}
   324  	for i, e := range c.Preds {
   325  		p := e.b
   326  		p.Succs[e.i] = Edge{c, i}
   327  	}
   328  	f := b.Func
   329  	if f.Entry == b {
   330  		f.Entry = c
   331  	}
   332  
   333  	// trash b's fields, just in case
   334  	for bx := b; bx != c; bx = b_next {
   335  		b_next = bx.Succs[0].b
   336  
   337  		bx.Kind = BlockInvalid
   338  		bx.Values = nil
   339  		bx.Preds = nil
   340  		bx.Succs = nil
   341  	}
   342  	return true
   343  }
   344  

View as plain text