mirror of https://github.com/tidwall/tile38.git
1064 lines
26 KiB
Go
1064 lines
26 KiB
Go
|
// Copyright 2013 The Go Authors. All rights reserved.
|
|||
|
// Use of this source code is governed by a BSD-style
|
|||
|
// license that can be found in the LICENSE file.
|
|||
|
|
|||
|
package ir
|
|||
|
|
|||
|
// This file defines the lifting pass which tries to "lift" Alloc
|
|||
|
// cells (new/local variables) into SSA registers, replacing loads
|
|||
|
// with the dominating stored value, eliminating loads and stores, and
|
|||
|
// inserting φ- and σ-nodes as needed.
|
|||
|
|
|||
|
// Cited papers and resources:
|
|||
|
//
|
|||
|
// Ron Cytron et al. 1991. Efficiently computing SSA form...
|
|||
|
// http://doi.acm.org/10.1145/115372.115320
|
|||
|
//
|
|||
|
// Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
|
|||
|
// Software Practice and Experience 2001, 4:1-10.
|
|||
|
// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
|
|||
|
//
|
|||
|
// Daniel Berlin, llvmdev mailing list, 2012.
|
|||
|
// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
|
|||
|
// (Be sure to expand the whole thread.)
|
|||
|
//
|
|||
|
// C. Scott Ananian. 1997. The static single information form.
|
|||
|
//
|
|||
|
// Jeremy Singer. 2006. Static program analysis based on virtual register renaming.
|
|||
|
|
|||
|
// TODO(adonovan): opt: there are many optimizations worth evaluating, and
|
|||
|
// the conventional wisdom for SSA construction is that a simple
|
|||
|
// algorithm well engineered often beats those of better asymptotic
|
|||
|
// complexity on all but the most egregious inputs.
|
|||
|
//
|
|||
|
// Danny Berlin suggests that the Cooper et al. algorithm for
|
|||
|
// computing the dominance frontier is superior to Cytron et al.
|
|||
|
// Furthermore he recommends that rather than computing the DF for the
|
|||
|
// whole function then renaming all alloc cells, it may be cheaper to
|
|||
|
// compute the DF for each alloc cell separately and throw it away.
|
|||
|
//
|
|||
|
// Consider exploiting liveness information to avoid creating dead
|
|||
|
// φ-nodes which we then immediately remove.
|
|||
|
//
|
|||
|
// Also see many other "TODO: opt" suggestions in the code.
|
|||
|
|
|||
|
import (
|
|||
|
"fmt"
|
|||
|
"go/types"
|
|||
|
"os"
|
|||
|
)
|
|||
|
|
|||
|
// If true, show diagnostic information at each step of lifting.
|
|||
|
// Very verbose.
|
|||
|
const debugLifting = false
|
|||
|
|
|||
|
// domFrontier maps each block to the set of blocks in its dominance
|
|||
|
// frontier. The outer slice is conceptually a map keyed by
|
|||
|
// Block.Index. The inner slice is conceptually a set, possibly
|
|||
|
// containing duplicates.
|
|||
|
//
|
|||
|
// TODO(adonovan): opt: measure impact of dups; consider a packed bit
|
|||
|
// representation, e.g. big.Int, and bitwise parallel operations for
|
|||
|
// the union step in the Children loop.
|
|||
|
//
|
|||
|
// domFrontier's methods mutate the slice's elements but not its
|
|||
|
// length, so their receivers needn't be pointers.
|
|||
|
//
|
|||
|
type domFrontier [][]*BasicBlock
|
|||
|
|
|||
|
func (df domFrontier) add(u, v *BasicBlock) {
|
|||
|
df[u.Index] = append(df[u.Index], v)
|
|||
|
}
|
|||
|
|
|||
|
// build builds the dominance frontier df for the dominator tree of
|
|||
|
// fn, using the algorithm found in A Simple, Fast Dominance
|
|||
|
// Algorithm, Figure 5.
|
|||
|
//
|
|||
|
// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
|
|||
|
// by pruning the entire IDF computation, rather than merely pruning
|
|||
|
// the DF -> IDF step.
|
|||
|
func (df domFrontier) build(fn *Function) {
|
|||
|
for _, b := range fn.Blocks {
|
|||
|
if len(b.Preds) >= 2 {
|
|||
|
for _, p := range b.Preds {
|
|||
|
runner := p
|
|||
|
for runner != b.dom.idom {
|
|||
|
df.add(runner, b)
|
|||
|
runner = runner.dom.idom
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func buildDomFrontier(fn *Function) domFrontier {
|
|||
|
df := make(domFrontier, len(fn.Blocks))
|
|||
|
df.build(fn)
|
|||
|
return df
|
|||
|
}
|
|||
|
|
|||
|
type postDomFrontier [][]*BasicBlock
|
|||
|
|
|||
|
func (rdf postDomFrontier) add(u, v *BasicBlock) {
|
|||
|
rdf[u.Index] = append(rdf[u.Index], v)
|
|||
|
}
|
|||
|
|
|||
|
func (rdf postDomFrontier) build(fn *Function) {
|
|||
|
for _, b := range fn.Blocks {
|
|||
|
if len(b.Succs) >= 2 {
|
|||
|
for _, s := range b.Succs {
|
|||
|
runner := s
|
|||
|
for runner != b.pdom.idom {
|
|||
|
rdf.add(runner, b)
|
|||
|
runner = runner.pdom.idom
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func buildPostDomFrontier(fn *Function) postDomFrontier {
|
|||
|
rdf := make(postDomFrontier, len(fn.Blocks))
|
|||
|
rdf.build(fn)
|
|||
|
return rdf
|
|||
|
}
|
|||
|
|
|||
|
func removeInstr(refs []Instruction, instr Instruction) []Instruction {
|
|||
|
i := 0
|
|||
|
for _, ref := range refs {
|
|||
|
if ref == instr {
|
|||
|
continue
|
|||
|
}
|
|||
|
refs[i] = ref
|
|||
|
i++
|
|||
|
}
|
|||
|
for j := i; j != len(refs); j++ {
|
|||
|
refs[j] = nil // aid GC
|
|||
|
}
|
|||
|
return refs[:i]
|
|||
|
}
|
|||
|
|
|||
|
func clearInstrs(instrs []Instruction) {
|
|||
|
for i := range instrs {
|
|||
|
instrs[i] = nil
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// lift replaces local and new Allocs accessed only with
|
|||
|
// load/store by IR registers, inserting φ- and σ-nodes where necessary.
|
|||
|
// The result is a program in pruned SSI form.
|
|||
|
//
|
|||
|
// Preconditions:
|
|||
|
// - fn has no dead blocks (blockopt has run).
|
|||
|
// - Def/use info (Operands and Referrers) is up-to-date.
|
|||
|
// - The dominator tree is up-to-date.
|
|||
|
//
|
|||
|
func lift(fn *Function) {
|
|||
|
// TODO(adonovan): opt: lots of little optimizations may be
|
|||
|
// worthwhile here, especially if they cause us to avoid
|
|||
|
// buildDomFrontier. For example:
|
|||
|
//
|
|||
|
// - Alloc never loaded? Eliminate.
|
|||
|
// - Alloc never stored? Replace all loads with a zero constant.
|
|||
|
// - Alloc stored once? Replace loads with dominating store;
|
|||
|
// don't forget that an Alloc is itself an effective store
|
|||
|
// of zero.
|
|||
|
// - Alloc used only within a single block?
|
|||
|
// Use degenerate algorithm avoiding φ-nodes.
|
|||
|
// - Consider synergy with scalar replacement of aggregates (SRA).
|
|||
|
// e.g. *(&x.f) where x is an Alloc.
|
|||
|
// Perhaps we'd get better results if we generated this as x.f
|
|||
|
// i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
|
|||
|
// Unclear.
|
|||
|
//
|
|||
|
// But we will start with the simplest correct code.
|
|||
|
var df domFrontier
|
|||
|
var rdf postDomFrontier
|
|||
|
var closure *closure
|
|||
|
var newPhis newPhiMap
|
|||
|
var newSigmas newSigmaMap
|
|||
|
|
|||
|
// During this pass we will replace some BasicBlock.Instrs
|
|||
|
// (allocs, loads and stores) with nil, keeping a count in
|
|||
|
// BasicBlock.gaps. At the end we will reset Instrs to the
|
|||
|
// concatenation of all non-dead newPhis and non-nil Instrs
|
|||
|
// for the block, reusing the original array if space permits.
|
|||
|
|
|||
|
// While we're here, we also eliminate 'rundefers'
|
|||
|
// instructions in functions that contain no 'defer'
|
|||
|
// instructions.
|
|||
|
usesDefer := false
|
|||
|
|
|||
|
// Determine which allocs we can lift and number them densely.
|
|||
|
// The renaming phase uses this numbering for compact maps.
|
|||
|
numAllocs := 0
|
|||
|
for _, b := range fn.Blocks {
|
|||
|
b.gaps = 0
|
|||
|
b.rundefers = 0
|
|||
|
for _, instr := range b.Instrs {
|
|||
|
switch instr := instr.(type) {
|
|||
|
case *Alloc:
|
|||
|
if !liftable(instr) {
|
|||
|
instr.index = -1
|
|||
|
continue
|
|||
|
}
|
|||
|
index := -1
|
|||
|
if numAllocs == 0 {
|
|||
|
df = buildDomFrontier(fn)
|
|||
|
rdf = buildPostDomFrontier(fn)
|
|||
|
if len(fn.Blocks) > 2 {
|
|||
|
closure = transitiveClosure(fn)
|
|||
|
}
|
|||
|
newPhis = make(newPhiMap, len(fn.Blocks))
|
|||
|
newSigmas = make(newSigmaMap, len(fn.Blocks))
|
|||
|
|
|||
|
if debugLifting {
|
|||
|
title := false
|
|||
|
for i, blocks := range df {
|
|||
|
if blocks != nil {
|
|||
|
if !title {
|
|||
|
fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
|
|||
|
title = true
|
|||
|
}
|
|||
|
fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
liftAlloc(closure, df, rdf, instr, newPhis, newSigmas)
|
|||
|
index = numAllocs
|
|||
|
numAllocs++
|
|||
|
instr.index = index
|
|||
|
case *Defer:
|
|||
|
usesDefer = true
|
|||
|
case *RunDefers:
|
|||
|
b.rundefers++
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if numAllocs > 0 {
|
|||
|
// renaming maps an alloc (keyed by index) to its replacement
|
|||
|
// value. Initially the renaming contains nil, signifying the
|
|||
|
// zero constant of the appropriate type; we construct the
|
|||
|
// Const lazily at most once on each path through the domtree.
|
|||
|
// TODO(adonovan): opt: cache per-function not per subtree.
|
|||
|
renaming := make([]Value, numAllocs)
|
|||
|
|
|||
|
// Renaming.
|
|||
|
rename(fn.Blocks[0], renaming, newPhis, newSigmas)
|
|||
|
|
|||
|
simplifyPhis(newPhis)
|
|||
|
|
|||
|
// Eliminate dead φ- and σ-nodes.
|
|||
|
markLiveNodes(fn.Blocks, newPhis, newSigmas)
|
|||
|
}
|
|||
|
|
|||
|
// Prepend remaining live φ-nodes to each block and possibly kill rundefers.
|
|||
|
for _, b := range fn.Blocks {
|
|||
|
var head []Instruction
|
|||
|
if numAllocs > 0 {
|
|||
|
nps := newPhis[b.Index]
|
|||
|
head = make([]Instruction, 0, len(nps))
|
|||
|
for _, pred := range b.Preds {
|
|||
|
nss := newSigmas[pred.Index]
|
|||
|
idx := pred.succIndex(b)
|
|||
|
for _, newSigma := range nss {
|
|||
|
if sigma := newSigma.sigmas[idx]; sigma != nil && sigma.live {
|
|||
|
head = append(head, sigma)
|
|||
|
|
|||
|
// we didn't populate referrers before, as most
|
|||
|
// sigma nodes will be killed
|
|||
|
if refs := sigma.X.Referrers(); refs != nil {
|
|||
|
*refs = append(*refs, sigma)
|
|||
|
}
|
|||
|
} else if sigma != nil {
|
|||
|
sigma.block = nil
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
for _, np := range nps {
|
|||
|
if np.phi.live {
|
|||
|
head = append(head, np.phi)
|
|||
|
} else {
|
|||
|
for _, edge := range np.phi.Edges {
|
|||
|
if refs := edge.Referrers(); refs != nil {
|
|||
|
*refs = removeInstr(*refs, np.phi)
|
|||
|
}
|
|||
|
}
|
|||
|
np.phi.block = nil
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
rundefersToKill := b.rundefers
|
|||
|
if usesDefer {
|
|||
|
rundefersToKill = 0
|
|||
|
}
|
|||
|
|
|||
|
j := len(head)
|
|||
|
if j+b.gaps+rundefersToKill == 0 {
|
|||
|
continue // fast path: no new phis or gaps
|
|||
|
}
|
|||
|
|
|||
|
// We could do straight copies instead of element-wise copies
|
|||
|
// when both b.gaps and rundefersToKill are zero. However,
|
|||
|
// that seems to only be the case ~1% of the time, which
|
|||
|
// doesn't seem worth the extra branch.
|
|||
|
|
|||
|
// Remove dead instructions, add phis and sigmas
|
|||
|
ns := len(b.Instrs) + j - b.gaps - rundefersToKill
|
|||
|
if ns <= cap(b.Instrs) {
|
|||
|
// b.Instrs has enough capacity to store all instructions
|
|||
|
|
|||
|
// OPT(dh): check cap vs the actually required space; if
|
|||
|
// there is a big enough difference, it may be worth
|
|||
|
// allocating a new slice, to avoid pinning memory.
|
|||
|
dst := b.Instrs[:cap(b.Instrs)]
|
|||
|
i := len(dst) - 1
|
|||
|
for n := len(b.Instrs) - 1; n >= 0; n-- {
|
|||
|
instr := dst[n]
|
|||
|
if instr == nil {
|
|||
|
continue
|
|||
|
}
|
|||
|
if !usesDefer {
|
|||
|
if _, ok := instr.(*RunDefers); ok {
|
|||
|
continue
|
|||
|
}
|
|||
|
}
|
|||
|
dst[i] = instr
|
|||
|
i--
|
|||
|
}
|
|||
|
off := i + 1 - len(head)
|
|||
|
// aid GC
|
|||
|
clearInstrs(dst[:off])
|
|||
|
dst = dst[off:]
|
|||
|
copy(dst, head)
|
|||
|
b.Instrs = dst
|
|||
|
} else {
|
|||
|
// not enough space, so allocate a new slice and copy
|
|||
|
// over.
|
|||
|
dst := make([]Instruction, ns)
|
|||
|
copy(dst, head)
|
|||
|
|
|||
|
for _, instr := range b.Instrs {
|
|||
|
if instr == nil {
|
|||
|
continue
|
|||
|
}
|
|||
|
if !usesDefer {
|
|||
|
if _, ok := instr.(*RunDefers); ok {
|
|||
|
continue
|
|||
|
}
|
|||
|
}
|
|||
|
dst[j] = instr
|
|||
|
j++
|
|||
|
}
|
|||
|
b.Instrs = dst
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Remove any fn.Locals that were lifted.
|
|||
|
j := 0
|
|||
|
for _, l := range fn.Locals {
|
|||
|
if l.index < 0 {
|
|||
|
fn.Locals[j] = l
|
|||
|
j++
|
|||
|
}
|
|||
|
}
|
|||
|
// Nil out fn.Locals[j:] to aid GC.
|
|||
|
for i := j; i < len(fn.Locals); i++ {
|
|||
|
fn.Locals[i] = nil
|
|||
|
}
|
|||
|
fn.Locals = fn.Locals[:j]
|
|||
|
}
|
|||
|
|
|||
|
func hasDirectReferrer(instr Instruction) bool {
|
|||
|
for _, instr := range *instr.Referrers() {
|
|||
|
switch instr.(type) {
|
|||
|
case *Phi, *Sigma:
|
|||
|
// ignore
|
|||
|
default:
|
|||
|
return true
|
|||
|
}
|
|||
|
}
|
|||
|
return false
|
|||
|
}
|
|||
|
|
|||
|
func markLiveNodes(blocks []*BasicBlock, newPhis newPhiMap, newSigmas newSigmaMap) {
|
|||
|
// Phi and sigma nodes are considered live if a non-phi, non-sigma
|
|||
|
// node uses them. Once we find a node that is live, we mark all
|
|||
|
// of its operands as used, too.
|
|||
|
for _, npList := range newPhis {
|
|||
|
for _, np := range npList {
|
|||
|
phi := np.phi
|
|||
|
if !phi.live && hasDirectReferrer(phi) {
|
|||
|
markLivePhi(phi)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
for _, npList := range newSigmas {
|
|||
|
for _, np := range npList {
|
|||
|
for _, sigma := range np.sigmas {
|
|||
|
if sigma != nil && !sigma.live && hasDirectReferrer(sigma) {
|
|||
|
markLiveSigma(sigma)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
// Existing φ-nodes due to && and || operators
|
|||
|
// are all considered live (see Go issue 19622).
|
|||
|
for _, b := range blocks {
|
|||
|
for _, phi := range b.phis() {
|
|||
|
markLivePhi(phi.(*Phi))
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func markLivePhi(phi *Phi) {
|
|||
|
phi.live = true
|
|||
|
for _, rand := range phi.Edges {
|
|||
|
switch rand := rand.(type) {
|
|||
|
case *Phi:
|
|||
|
if !rand.live {
|
|||
|
markLivePhi(rand)
|
|||
|
}
|
|||
|
case *Sigma:
|
|||
|
if !rand.live {
|
|||
|
markLiveSigma(rand)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func markLiveSigma(sigma *Sigma) {
|
|||
|
sigma.live = true
|
|||
|
switch rand := sigma.X.(type) {
|
|||
|
case *Phi:
|
|||
|
if !rand.live {
|
|||
|
markLivePhi(rand)
|
|||
|
}
|
|||
|
case *Sigma:
|
|||
|
if !rand.live {
|
|||
|
markLiveSigma(rand)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// simplifyPhis replaces trivial phis with non-phi alternatives. Phi
|
|||
|
// nodes where all edges are identical, or consist of only the phi
|
|||
|
// itself and one other value, may be replaced with the value.
|
|||
|
func simplifyPhis(newPhis newPhiMap) {
|
|||
|
// find all phis that are trivial and can be replaced with a
|
|||
|
// non-phi value. run until we reach a fixpoint, because replacing
|
|||
|
// a phi may make other phis trivial.
|
|||
|
for changed := true; changed; {
|
|||
|
changed = false
|
|||
|
for _, npList := range newPhis {
|
|||
|
for _, np := range npList {
|
|||
|
if np.phi.live {
|
|||
|
// we're reusing 'live' to mean 'dead' in the context of simplifyPhis
|
|||
|
continue
|
|||
|
}
|
|||
|
if r, ok := isUselessPhi(np.phi); ok {
|
|||
|
// useless phi, replace its uses with the
|
|||
|
// replacement value. the dead phi pass will clean
|
|||
|
// up the phi afterwards.
|
|||
|
replaceAll(np.phi, r)
|
|||
|
np.phi.live = true
|
|||
|
changed = true
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
for _, npList := range newPhis {
|
|||
|
for _, np := range npList {
|
|||
|
np.phi.live = false
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
type BlockSet struct {
|
|||
|
idx int
|
|||
|
values []bool
|
|||
|
count int
|
|||
|
}
|
|||
|
|
|||
|
func NewBlockSet(size int) *BlockSet {
|
|||
|
return &BlockSet{values: make([]bool, size)}
|
|||
|
}
|
|||
|
|
|||
|
func (s *BlockSet) Set(s2 *BlockSet) {
|
|||
|
copy(s.values, s2.values)
|
|||
|
s.count = 0
|
|||
|
for _, v := range s.values {
|
|||
|
if v {
|
|||
|
s.count++
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func (s *BlockSet) Num() int {
|
|||
|
return s.count
|
|||
|
}
|
|||
|
|
|||
|
func (s *BlockSet) Has(b *BasicBlock) bool {
|
|||
|
if b.Index >= len(s.values) {
|
|||
|
return false
|
|||
|
}
|
|||
|
return s.values[b.Index]
|
|||
|
}
|
|||
|
|
|||
|
// add adds b to the set and returns true if the set changed.
|
|||
|
func (s *BlockSet) Add(b *BasicBlock) bool {
|
|||
|
if s.values[b.Index] {
|
|||
|
return false
|
|||
|
}
|
|||
|
s.count++
|
|||
|
s.values[b.Index] = true
|
|||
|
s.idx = b.Index
|
|||
|
|
|||
|
return true
|
|||
|
}
|
|||
|
|
|||
|
func (s *BlockSet) Clear() {
|
|||
|
for j := range s.values {
|
|||
|
s.values[j] = false
|
|||
|
}
|
|||
|
s.count = 0
|
|||
|
}
|
|||
|
|
|||
|
// take removes an arbitrary element from a set s and
|
|||
|
// returns its index, or returns -1 if empty.
|
|||
|
func (s *BlockSet) Take() int {
|
|||
|
// [i, end]
|
|||
|
for i := s.idx; i < len(s.values); i++ {
|
|||
|
if s.values[i] {
|
|||
|
s.values[i] = false
|
|||
|
s.idx = i
|
|||
|
s.count--
|
|||
|
return i
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// [start, i)
|
|||
|
for i := 0; i < s.idx; i++ {
|
|||
|
if s.values[i] {
|
|||
|
s.values[i] = false
|
|||
|
s.idx = i
|
|||
|
s.count--
|
|||
|
return i
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return -1
|
|||
|
}
|
|||
|
|
|||
|
type closure struct {
|
|||
|
span []uint32
|
|||
|
reachables []interval
|
|||
|
}
|
|||
|
|
|||
|
type interval uint32
|
|||
|
|
|||
|
const (
|
|||
|
flagMask = 1 << 31
|
|||
|
numBits = 20
|
|||
|
lengthBits = 32 - numBits - 1
|
|||
|
lengthMask = (1<<lengthBits - 1) << numBits
|
|||
|
numMask = 1<<numBits - 1
|
|||
|
)
|
|||
|
|
|||
|
func (c closure) has(s, v *BasicBlock) bool {
|
|||
|
idx := uint32(v.Index)
|
|||
|
if idx == 1 || s.Dominates(v) {
|
|||
|
return true
|
|||
|
}
|
|||
|
r := c.reachable(s.Index)
|
|||
|
for i := 0; i < len(r); i++ {
|
|||
|
inv := r[i]
|
|||
|
var start, end uint32
|
|||
|
if inv&flagMask == 0 {
|
|||
|
// small interval
|
|||
|
start = uint32(inv & numMask)
|
|||
|
end = start + uint32(inv&lengthMask)>>numBits
|
|||
|
} else {
|
|||
|
// large interval
|
|||
|
i++
|
|||
|
start = uint32(inv & numMask)
|
|||
|
end = uint32(r[i])
|
|||
|
}
|
|||
|
if idx >= start && idx <= end {
|
|||
|
return true
|
|||
|
}
|
|||
|
}
|
|||
|
return false
|
|||
|
}
|
|||
|
|
|||
|
func (c closure) reachable(id int) []interval {
|
|||
|
return c.reachables[c.span[id]:c.span[id+1]]
|
|||
|
}
|
|||
|
|
|||
|
func (c closure) walk(current *BasicBlock, b *BasicBlock, visited []bool) {
|
|||
|
visited[b.Index] = true
|
|||
|
for _, succ := range b.Succs {
|
|||
|
if visited[succ.Index] {
|
|||
|
continue
|
|||
|
}
|
|||
|
visited[succ.Index] = true
|
|||
|
c.walk(current, succ, visited)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func transitiveClosure(fn *Function) *closure {
|
|||
|
reachable := make([]bool, len(fn.Blocks))
|
|||
|
c := &closure{}
|
|||
|
c.span = make([]uint32, len(fn.Blocks)+1)
|
|||
|
|
|||
|
addInterval := func(start, end uint32) {
|
|||
|
if l := end - start; l <= 1<<lengthBits-1 {
|
|||
|
n := interval(l<<numBits | start)
|
|||
|
c.reachables = append(c.reachables, n)
|
|||
|
} else {
|
|||
|
n1 := interval(1<<31 | start)
|
|||
|
n2 := interval(end)
|
|||
|
c.reachables = append(c.reachables, n1, n2)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
for i, b := range fn.Blocks[1:] {
|
|||
|
for i := range reachable {
|
|||
|
reachable[i] = false
|
|||
|
}
|
|||
|
|
|||
|
c.walk(b, b, reachable)
|
|||
|
start := ^uint32(0)
|
|||
|
for id, isReachable := range reachable {
|
|||
|
if !isReachable {
|
|||
|
if start != ^uint32(0) {
|
|||
|
end := uint32(id) - 1
|
|||
|
addInterval(start, end)
|
|||
|
start = ^uint32(0)
|
|||
|
}
|
|||
|
continue
|
|||
|
} else if start == ^uint32(0) {
|
|||
|
start = uint32(id)
|
|||
|
}
|
|||
|
}
|
|||
|
if start != ^uint32(0) {
|
|||
|
addInterval(start, uint32(len(reachable))-1)
|
|||
|
}
|
|||
|
|
|||
|
c.span[i+2] = uint32(len(c.reachables))
|
|||
|
}
|
|||
|
|
|||
|
return c
|
|||
|
}
|
|||
|
|
|||
|
// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
|
|||
|
// it replaces.
|
|||
|
type newPhi struct {
|
|||
|
phi *Phi
|
|||
|
alloc *Alloc
|
|||
|
}
|
|||
|
|
|||
|
type newSigma struct {
|
|||
|
alloc *Alloc
|
|||
|
sigmas []*Sigma
|
|||
|
}
|
|||
|
|
|||
|
// newPhiMap records for each basic block, the set of newPhis that
|
|||
|
// must be prepended to the block.
|
|||
|
type newPhiMap [][]newPhi
|
|||
|
type newSigmaMap [][]newSigma
|
|||
|
|
|||
|
func liftable(alloc *Alloc) bool {
|
|||
|
// Don't lift aggregates into registers, because we don't have
|
|||
|
// a way to express their zero-constants.
|
|||
|
switch deref(alloc.Type()).Underlying().(type) {
|
|||
|
case *types.Array, *types.Struct:
|
|||
|
return false
|
|||
|
}
|
|||
|
|
|||
|
fn := alloc.Parent()
|
|||
|
// Don't lift named return values in functions that defer
|
|||
|
// calls that may recover from panic.
|
|||
|
if fn.hasDefer {
|
|||
|
for _, nr := range fn.namedResults {
|
|||
|
if nr == alloc {
|
|||
|
return false
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
for _, instr := range *alloc.Referrers() {
|
|||
|
switch instr := instr.(type) {
|
|||
|
case *Store:
|
|||
|
if instr.Val == alloc {
|
|||
|
return false // address used as value
|
|||
|
}
|
|||
|
if instr.Addr != alloc {
|
|||
|
panic("Alloc.Referrers is inconsistent")
|
|||
|
}
|
|||
|
case *Load:
|
|||
|
if instr.X != alloc {
|
|||
|
panic("Alloc.Referrers is inconsistent")
|
|||
|
}
|
|||
|
|
|||
|
case *DebugRef:
|
|||
|
// ok
|
|||
|
default:
|
|||
|
return false
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return true
|
|||
|
}
|
|||
|
|
|||
|
// liftAlloc determines whether alloc can be lifted into registers,
|
|||
|
// and if so, it populates newPhis with all the φ-nodes it may require
|
|||
|
// and returns true.
|
|||
|
func liftAlloc(closure *closure, df domFrontier, rdf postDomFrontier, alloc *Alloc, newPhis newPhiMap, newSigmas newSigmaMap) {
|
|||
|
fn := alloc.Parent()
|
|||
|
|
|||
|
defblocks := fn.blockset(0)
|
|||
|
useblocks := fn.blockset(1)
|
|||
|
Aphi := fn.blockset(2)
|
|||
|
Asigma := fn.blockset(3)
|
|||
|
W := fn.blockset(4)
|
|||
|
|
|||
|
// Compute defblocks, the set of blocks containing a
|
|||
|
// definition of the alloc cell.
|
|||
|
for _, instr := range *alloc.Referrers() {
|
|||
|
// Bail out if we discover the alloc is not liftable;
|
|||
|
// the only operations permitted to use the alloc are
|
|||
|
// loads/stores into the cell, and DebugRef.
|
|||
|
switch instr := instr.(type) {
|
|||
|
case *Store:
|
|||
|
defblocks.Add(instr.Block())
|
|||
|
case *Load:
|
|||
|
useblocks.Add(instr.Block())
|
|||
|
for _, ref := range *instr.Referrers() {
|
|||
|
useblocks.Add(ref.Block())
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
// The Alloc itself counts as a (zero) definition of the cell.
|
|||
|
defblocks.Add(alloc.Block())
|
|||
|
|
|||
|
if debugLifting {
|
|||
|
fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
|
|||
|
}
|
|||
|
|
|||
|
// Φ-insertion.
|
|||
|
//
|
|||
|
// What follows is the body of the main loop of the insert-φ
|
|||
|
// function described by Cytron et al, but instead of using
|
|||
|
// counter tricks, we just reset the 'hasAlready' and 'work'
|
|||
|
// sets each iteration. These are bitmaps so it's pretty cheap.
|
|||
|
|
|||
|
// Initialize W and work to defblocks.
|
|||
|
|
|||
|
for change := true; change; {
|
|||
|
change = false
|
|||
|
{
|
|||
|
// Traverse iterated dominance frontier, inserting φ-nodes.
|
|||
|
W.Set(defblocks)
|
|||
|
|
|||
|
for i := W.Take(); i != -1; i = W.Take() {
|
|||
|
n := fn.Blocks[i]
|
|||
|
for _, y := range df[n.Index] {
|
|||
|
if Aphi.Add(y) {
|
|||
|
if len(*alloc.Referrers()) == 0 {
|
|||
|
continue
|
|||
|
}
|
|||
|
live := false
|
|||
|
if closure == nil {
|
|||
|
live = true
|
|||
|
} else {
|
|||
|
for _, ref := range *alloc.Referrers() {
|
|||
|
if _, ok := ref.(*Load); ok {
|
|||
|
if closure.has(y, ref.Block()) {
|
|||
|
live = true
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if !live {
|
|||
|
continue
|
|||
|
}
|
|||
|
|
|||
|
// Create φ-node.
|
|||
|
// It will be prepended to v.Instrs later, if needed.
|
|||
|
phi := &Phi{
|
|||
|
Edges: make([]Value, len(y.Preds)),
|
|||
|
}
|
|||
|
|
|||
|
phi.source = alloc.source
|
|||
|
phi.setType(deref(alloc.Type()))
|
|||
|
phi.block = y
|
|||
|
if debugLifting {
|
|||
|
fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, y)
|
|||
|
}
|
|||
|
newPhis[y.Index] = append(newPhis[y.Index], newPhi{phi, alloc})
|
|||
|
|
|||
|
for _, p := range y.Preds {
|
|||
|
useblocks.Add(p)
|
|||
|
}
|
|||
|
change = true
|
|||
|
if defblocks.Add(y) {
|
|||
|
W.Add(y)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
{
|
|||
|
W.Set(useblocks)
|
|||
|
for i := W.Take(); i != -1; i = W.Take() {
|
|||
|
n := fn.Blocks[i]
|
|||
|
for _, y := range rdf[n.Index] {
|
|||
|
if Asigma.Add(y) {
|
|||
|
sigmas := make([]*Sigma, 0, len(y.Succs))
|
|||
|
anyLive := false
|
|||
|
for _, succ := range y.Succs {
|
|||
|
live := false
|
|||
|
for _, ref := range *alloc.Referrers() {
|
|||
|
if closure == nil || closure.has(succ, ref.Block()) {
|
|||
|
live = true
|
|||
|
anyLive = true
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
if live {
|
|||
|
sigma := &Sigma{
|
|||
|
From: y,
|
|||
|
X: alloc,
|
|||
|
}
|
|||
|
sigma.source = alloc.source
|
|||
|
sigma.setType(deref(alloc.Type()))
|
|||
|
sigma.block = succ
|
|||
|
sigmas = append(sigmas, sigma)
|
|||
|
} else {
|
|||
|
sigmas = append(sigmas, nil)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if anyLive {
|
|||
|
newSigmas[y.Index] = append(newSigmas[y.Index], newSigma{alloc, sigmas})
|
|||
|
for _, s := range y.Succs {
|
|||
|
defblocks.Add(s)
|
|||
|
}
|
|||
|
change = true
|
|||
|
if useblocks.Add(y) {
|
|||
|
W.Add(y)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// replaceAll replaces all intraprocedural uses of x with y,
|
|||
|
// updating x.Referrers and y.Referrers.
|
|||
|
// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
|
|||
|
//
|
|||
|
func replaceAll(x, y Value) {
|
|||
|
var rands []*Value
|
|||
|
pxrefs := x.Referrers()
|
|||
|
pyrefs := y.Referrers()
|
|||
|
for _, instr := range *pxrefs {
|
|||
|
rands = instr.Operands(rands[:0]) // recycle storage
|
|||
|
for _, rand := range rands {
|
|||
|
if *rand != nil {
|
|||
|
if *rand == x {
|
|||
|
*rand = y
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if pyrefs != nil {
|
|||
|
*pyrefs = append(*pyrefs, instr) // dups ok
|
|||
|
}
|
|||
|
}
|
|||
|
*pxrefs = nil // x is now unreferenced
|
|||
|
}
|
|||
|
|
|||
|
// renamed returns the value to which alloc is being renamed,
|
|||
|
// constructing it lazily if it's the implicit zero initialization.
|
|||
|
//
|
|||
|
func renamed(fn *Function, renaming []Value, alloc *Alloc) Value {
|
|||
|
v := renaming[alloc.index]
|
|||
|
if v == nil {
|
|||
|
v = emitConst(fn, zeroConst(deref(alloc.Type())))
|
|||
|
renaming[alloc.index] = v
|
|||
|
}
|
|||
|
return v
|
|||
|
}
|
|||
|
|
|||
|
// rename implements the Cytron et al-based SSI renaming algorithm, a
|
|||
|
// preorder traversal of the dominator tree replacing all loads of
|
|||
|
// Alloc cells with the value stored to that cell by the dominating
|
|||
|
// store instruction.
|
|||
|
//
|
|||
|
// renaming is a map from *Alloc (keyed by index number) to its
|
|||
|
// dominating stored value; newPhis[x] is the set of new φ-nodes to be
|
|||
|
// prepended to block x.
|
|||
|
//
|
|||
|
func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap, newSigmas newSigmaMap) {
|
|||
|
// Each φ-node becomes the new name for its associated Alloc.
|
|||
|
for _, np := range newPhis[u.Index] {
|
|||
|
phi := np.phi
|
|||
|
alloc := np.alloc
|
|||
|
renaming[alloc.index] = phi
|
|||
|
}
|
|||
|
|
|||
|
// Rename loads and stores of allocs.
|
|||
|
for i, instr := range u.Instrs {
|
|||
|
switch instr := instr.(type) {
|
|||
|
case *Alloc:
|
|||
|
if instr.index >= 0 { // store of zero to Alloc cell
|
|||
|
// Replace dominated loads by the zero value.
|
|||
|
renaming[instr.index] = nil
|
|||
|
if debugLifting {
|
|||
|
fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
|
|||
|
}
|
|||
|
// Delete the Alloc.
|
|||
|
u.Instrs[i] = nil
|
|||
|
u.gaps++
|
|||
|
}
|
|||
|
|
|||
|
case *Store:
|
|||
|
if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
|
|||
|
// Replace dominated loads by the stored value.
|
|||
|
renaming[alloc.index] = instr.Val
|
|||
|
if debugLifting {
|
|||
|
fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
|
|||
|
instr, instr.Val.Name())
|
|||
|
}
|
|||
|
if refs := instr.Addr.Referrers(); refs != nil {
|
|||
|
*refs = removeInstr(*refs, instr)
|
|||
|
}
|
|||
|
if refs := instr.Val.Referrers(); refs != nil {
|
|||
|
*refs = removeInstr(*refs, instr)
|
|||
|
}
|
|||
|
// Delete the Store.
|
|||
|
u.Instrs[i] = nil
|
|||
|
u.gaps++
|
|||
|
}
|
|||
|
|
|||
|
case *Load:
|
|||
|
if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
|
|||
|
// In theory, we wouldn't be able to replace loads
|
|||
|
// directly, because a loaded value could be used in
|
|||
|
// different branches, in which case it should be
|
|||
|
// replaced with different sigma nodes. But we can't
|
|||
|
// simply defer replacement, either, because then
|
|||
|
// later stores might incorrectly affect this load.
|
|||
|
//
|
|||
|
// To avoid doing renaming on _all_ values (instead of
|
|||
|
// just loads and stores like we're doing), we make
|
|||
|
// sure during code generation that each load is only
|
|||
|
// used in one block. For example, in constant switch
|
|||
|
// statements, where the tag is only evaluated once,
|
|||
|
// we store it in a temporary and load it for each
|
|||
|
// comparison, so that we have individual loads to
|
|||
|
// replace.
|
|||
|
newval := renamed(u.Parent(), renaming, alloc)
|
|||
|
if debugLifting {
|
|||
|
fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
|
|||
|
instr.Name(), instr, newval)
|
|||
|
}
|
|||
|
replaceAll(instr, newval)
|
|||
|
u.Instrs[i] = nil
|
|||
|
u.gaps++
|
|||
|
}
|
|||
|
|
|||
|
case *DebugRef:
|
|||
|
if x, ok := instr.X.(*Alloc); ok && x.index >= 0 {
|
|||
|
if instr.IsAddr {
|
|||
|
instr.X = renamed(u.Parent(), renaming, x)
|
|||
|
instr.IsAddr = false
|
|||
|
|
|||
|
// Add DebugRef to instr.X's referrers.
|
|||
|
if refs := instr.X.Referrers(); refs != nil {
|
|||
|
*refs = append(*refs, instr)
|
|||
|
}
|
|||
|
} else {
|
|||
|
// A source expression denotes the address
|
|||
|
// of an Alloc that was optimized away.
|
|||
|
instr.X = nil
|
|||
|
|
|||
|
// Delete the DebugRef.
|
|||
|
u.Instrs[i] = nil
|
|||
|
u.gaps++
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// update all outgoing sigma nodes with the dominating store
|
|||
|
for _, sigmas := range newSigmas[u.Index] {
|
|||
|
for _, sigma := range sigmas.sigmas {
|
|||
|
if sigma == nil {
|
|||
|
continue
|
|||
|
}
|
|||
|
sigma.X = renamed(u.Parent(), renaming, sigmas.alloc)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// For each φ-node in a CFG successor, rename the edge.
|
|||
|
for succi, v := range u.Succs {
|
|||
|
phis := newPhis[v.Index]
|
|||
|
if len(phis) == 0 {
|
|||
|
continue
|
|||
|
}
|
|||
|
i := v.predIndex(u)
|
|||
|
for _, np := range phis {
|
|||
|
phi := np.phi
|
|||
|
alloc := np.alloc
|
|||
|
// if there's a sigma node, use it, else use the dominating value
|
|||
|
var newval Value
|
|||
|
for _, sigmas := range newSigmas[u.Index] {
|
|||
|
if sigmas.alloc == alloc && sigmas.sigmas[succi] != nil {
|
|||
|
newval = sigmas.sigmas[succi]
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
if newval == nil {
|
|||
|
newval = renamed(u.Parent(), renaming, alloc)
|
|||
|
}
|
|||
|
if debugLifting {
|
|||
|
fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
|
|||
|
phi.Name(), u, v, i, alloc.Name(), newval.Name())
|
|||
|
}
|
|||
|
phi.Edges[i] = newval
|
|||
|
if prefs := newval.Referrers(); prefs != nil {
|
|||
|
*prefs = append(*prefs, phi)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Continue depth-first recursion over domtree, pushing a
|
|||
|
// fresh copy of the renaming map for each subtree.
|
|||
|
r := make([]Value, len(renaming))
|
|||
|
for _, v := range u.dom.children {
|
|||
|
// XXX add debugging
|
|||
|
copy(r, renaming)
|
|||
|
|
|||
|
// on entry to a block, the incoming sigma nodes become the new values for their alloc
|
|||
|
if idx := u.succIndex(v); idx != -1 {
|
|||
|
for _, sigma := range newSigmas[u.Index] {
|
|||
|
if sigma.sigmas[idx] != nil {
|
|||
|
r[sigma.alloc.index] = sigma.sigmas[idx]
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
rename(v, r, newPhis, newSigmas)
|
|||
|
}
|
|||
|
|
|||
|
}
|