1 files changed, 378 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go
new file mode 100644
index 0000000..d649797
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/cse.go
@@ -0,0 +1,378 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"cmd/compile/internal/types"
+	"cmd/internal/src"
+	"fmt"
+	"sort"
+)
+
+// cse does common-subexpression elimination on the Function.
+// Values are just relinked, nothing is deleted. A subsequent deadcode
+// pass is required to actually remove duplicate expressions.
+func cse(f *Func) {
+	// Two values are equivalent if they satisfy the following definition:
+	// equivalent(v, w):
+	//   v.op == w.op
+	//   v.type == w.type
+	//   v.aux == w.aux
+	//   v.auxint == w.auxint
+	//   len(v.args) == len(w.args)
+	//   v.block == w.block if v.op == OpPhi
+	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
+
+	// The algorithm searches for a partition of f's values into
+	// equivalence classes using the above definition.
+	// It starts with a coarse partition and iteratively refines it
+	// until it reaches a fixed point.
+
+	// Make initial coarse partitions by using a subset of the conditions above.
+	a := f.Cache.allocValueSlice(f.NumValues())
+	defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a
+	a = a[:0]
+	if f.auxmap == nil {
+		f.auxmap = auxmap{}
+	}
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Type.IsMemory() {
+				continue // memory values can never cse
+			}
+			if f.auxmap[v.Aux] == 0 {
+				f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1
+			}
+			a = append(a, v)
+		}
+	}
+	partition := partitionValues(a, f.auxmap)
+
+	// map from value id back to eqclass id
+	valueEqClass := f.Cache.allocIDSlice(f.NumValues())
+	defer f.Cache.freeIDSlice(valueEqClass)
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			// Use negative equivalence class #s for unique values.
+			valueEqClass[v.ID] = -v.ID
+		}
+	}
+	var pNum ID = 1
+	for _, e := range partition {
+		if f.pass.debug > 1 && len(e) > 500 {
+			fmt.Printf("CSE.large partition (%d): ", len(e))
+			for j := 0; j < 3; j++ {
+				fmt.Printf("%s ", e[j].LongString())
+			}
+			fmt.Println()
+		}
+
+		for _, v := range e {
+			valueEqClass[v.ID] = pNum
+		}
+		if f.pass.debug > 2 && len(e) > 1 {
+			fmt.Printf("CSE.partition #%d:", pNum)
+			for _, v := range e {
+				fmt.Printf(" %s", v.String())
+			}
+			fmt.Printf("\n")
+		}
+		pNum++
+	}
+
+	// Split equivalence classes at points where they have
+	// non-equivalent arguments.  Repeat until we can't find any
+	// more splits.
+	var splitPoints []int
+	byArgClass := new(partitionByArgClass) // reusable partitionByArgClass to reduce allocations
+	for {
+		changed := false
+
+		// partition can grow in the loop. By not using a range loop here,
+		// we process new additions as they arrive, avoiding O(n^2) behavior.
+		for i := 0; i < len(partition); i++ {
+			e := partition[i]
+
+			if opcodeTable[e[0].Op].commutative {
+				// Order the first two args before comparison.
+				for _, v := range e {
+					if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] {
+						v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
+					}
+				}
+			}
+
+			// Sort by eq class of arguments.
+			byArgClass.a = e
+			byArgClass.eqClass = valueEqClass
+			sort.Sort(byArgClass)
+
+			// Find split points.
+			splitPoints = append(splitPoints[:0], 0)
+			for j := 1; j < len(e); j++ {
+				v, w := e[j-1], e[j]
+				// Note: commutative args already correctly ordered by byArgClass.
+				eqArgs := true
+				for k, a := range v.Args {
+					b := w.Args[k]
+					if valueEqClass[a.ID] != valueEqClass[b.ID] {
+						eqArgs = false
+						break
+					}
+				}
+				if !eqArgs {
+					splitPoints = append(splitPoints, j)
+				}
+			}
+			if len(splitPoints) == 1 {
+				continue // no splits, leave equivalence class alone.
+			}
+
+			// Move another equivalence class down in place of e.
+			partition[i] = partition[len(partition)-1]
+			partition = partition[:len(partition)-1]
+			i--
+
+			// Add new equivalence classes for the parts of e we found.
+			splitPoints = append(splitPoints, len(e))
+			for j := 0; j < len(splitPoints)-1; j++ {
+				f := e[splitPoints[j]:splitPoints[j+1]]
+				if len(f) == 1 {
+					// Don't add singletons.
+					valueEqClass[f[0].ID] = -f[0].ID
+					continue
+				}
+				for _, v := range f {
+					valueEqClass[v.ID] = pNum
+				}
+				pNum++
+				partition = append(partition, f)
+			}
+			changed = true
+		}
+
+		if !changed {
+			break
+		}
+	}
+
+	sdom := f.Sdom()
+
+	// Compute substitutions we would like to do. We substitute v for w
+	// if v and w are in the same equivalence class and v dominates w.
+	rewrite := f.Cache.allocValueSlice(f.NumValues())
+	defer f.Cache.freeValueSlice(rewrite)
+	byDom := new(partitionByDom) // reusable partitionByDom to reduce allocs
+	for _, e := range partition {
+		byDom.a = e
+		byDom.sdom = sdom
+		sort.Sort(byDom)
+		for i := 0; i < len(e)-1; i++ {
+			// e is sorted by domorder, so a maximal dominant element is first in the slice
+			v := e[i]
+			if v == nil {
+				continue
+			}
+
+			e[i] = nil
+			// Replace all elements of e which v dominates
+			for j := i + 1; j < len(e); j++ {
+				w := e[j]
+				if w == nil {
+					continue
+				}
+				if sdom.IsAncestorEq(v.Block, w.Block) {
+					rewrite[w.ID] = v
+					e[j] = nil
+				} else {
+					// e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e
+					break
+				}
+			}
+		}
+	}
+
+	rewrites := int64(0)
+
+	// Apply substitutions
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, w := range v.Args {
+				if x := rewrite[w.ID]; x != nil {
+					if w.Pos.IsStmt() == src.PosIsStmt {
+						// about to lose a statement marker, w
+						// w is an input to v; if they're in the same block
+						// and the same line, v is a good-enough new statement boundary.
+						if w.Block == v.Block && w.Pos.Line() == v.Pos.Line() {
+							v.Pos = v.Pos.WithIsStmt()
+							w.Pos = w.Pos.WithNotStmt()
+						} // TODO and if this fails?
+					}
+					v.SetArg(i, x)
+					rewrites++
+				}
+			}
+		}
+		for i, v := range b.ControlValues() {
+			if x := rewrite[v.ID]; x != nil {
+				if v.Op == OpNilCheck {
+					// nilcheck pass will remove the nil checks and log
+					// them appropriately, so don't mess with them here.
+					continue
+				}
+				b.ReplaceControl(i, x)
+			}
+		}
+	}
+
+	if f.pass.stats > 0 {
+		f.LogStat("CSE REWRITES", rewrites)
+	}
+}
+
+// An eqclass approximates an equivalence class. During the
+// algorithm it may represent the union of several of the
+// final equivalence classes.
+type eqclass []*Value
+
+// partitionValues partitions the values into equivalence classes
+// based on having all the following features match:
+//   - opcode
+//   - type
+//   - auxint
+//   - aux
+//   - nargs
+//   - block # if a phi op
+//   - first two arg's opcodes and auxint
+//   - NOT first two arg's aux; that can break CSE.
+//
+// partitionValues returns a list of equivalence classes, each
+// being a sorted by ID list of *Values. The eqclass slices are
+// backed by the same storage as the input slice.
+// Equivalence classes of size 1 are ignored.
+func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
+	sort.Sort(sortvalues{a, auxIDs})
+
+	var partition []eqclass
+	for len(a) > 0 {
+		v := a[0]
+		j := 1
+		for ; j < len(a); j++ {
+			w := a[j]
+			if cmpVal(v, w, auxIDs) != types.CMPeq {
+				break
+			}
+		}
+		if j > 1 {
+			partition = append(partition, a[:j])
+		}
+		a = a[j:]
+	}
+
+	return partition
+}
+func lt2Cmp(isLt bool) types.Cmp {
+	if isLt {
+		return types.CMPlt
+	}
+	return types.CMPgt
+}
+
+type auxmap map[Aux]int32
+
+func cmpVal(v, w *Value, auxIDs auxmap) types.Cmp {
+	// Try to order these comparison by cost (cheaper first)
+	if v.Op != w.Op {
+		return lt2Cmp(v.Op < w.Op)
+	}
+	if v.AuxInt != w.AuxInt {
+		return lt2Cmp(v.AuxInt < w.AuxInt)
+	}
+	if len(v.Args) != len(w.Args) {
+		return lt2Cmp(len(v.Args) < len(w.Args))
+	}
+	if v.Op == OpPhi && v.Block != w.Block {
+		return lt2Cmp(v.Block.ID < w.Block.ID)
+	}
+	if v.Type.IsMemory() {
+		// We will never be able to CSE two values
+		// that generate memory.
+		return lt2Cmp(v.ID < w.ID)
+	}
+	// OpSelect is a pseudo-op. We need to be more aggressive
+	// regarding CSE to keep multiple OpSelect's of the same
+	// argument from existing.
+	if v.Op != OpSelect0 && v.Op != OpSelect1 && v.Op != OpSelectN {
+		if tc := v.Type.Compare(w.Type); tc != types.CMPeq {
+			return tc
+		}
+	}
+
+	if v.Aux != w.Aux {
+		if v.Aux == nil {
+			return types.CMPlt
+		}
+		if w.Aux == nil {
+			return types.CMPgt
+		}
+		return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
+	}
+
+	return types.CMPeq
+}
+
+// Sort values to make the initial partition.
+type sortvalues struct {
+	a      []*Value // array of values
+	auxIDs auxmap   // aux -> aux ID map
+}
+
+func (sv sortvalues) Len() int      { return len(sv.a) }
+func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
+func (sv sortvalues) Less(i, j int) bool {
+	v := sv.a[i]
+	w := sv.a[j]
+	if cmp := cmpVal(v, w, sv.auxIDs); cmp != types.CMPeq {
+		return cmp == types.CMPlt
+	}
+
+	// Sort by value ID last to keep the sort result deterministic.
+	return v.ID < w.ID
+}
+
+type partitionByDom struct {
+	a    []*Value // array of values
+	sdom SparseTree
+}
+
+func (sv partitionByDom) Len() int      { return len(sv.a) }
+func (sv partitionByDom) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
+func (sv partitionByDom) Less(i, j int) bool {
+	v := sv.a[i]
+	w := sv.a[j]
+	return sv.sdom.domorder(v.Block) < sv.sdom.domorder(w.Block)
+}
+
+type partitionByArgClass struct {
+	a       []*Value // array of values
+	eqClass []ID     // equivalence class IDs of values
+}
+
+func (sv partitionByArgClass) Len() int      { return len(sv.a) }
+func (sv partitionByArgClass) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
+func (sv partitionByArgClass) Less(i, j int) bool {
+	v := sv.a[i]
+	w := sv.a[j]
+	for i, a := range v.Args {
+		b := w.Args[i]
+		if sv.eqClass[a.ID] < sv.eqClass[b.ID] {
+			return true
+		}
+		if sv.eqClass[a.ID] > sv.eqClass[b.ID] {
+			return false
+		}
+	}
+	return false
+}