453 lines
14 KiB
Go
453 lines
14 KiB
Go
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package pointer
|
||
|
|
||
|
// This file defines the main datatypes and Analyze function of the pointer analysis.
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"go/token"
|
||
|
"go/types"
|
||
|
"io"
|
||
|
"os"
|
||
|
"reflect"
|
||
|
"runtime"
|
||
|
"runtime/debug"
|
||
|
"sort"
|
||
|
|
||
|
"golang.org/x/tools/go/callgraph"
|
||
|
"golang.org/x/tools/go/ssa"
|
||
|
"golang.org/x/tools/go/types/typeutil"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
// optimization options; enable all when committing
|
||
|
optRenumber = true // enable renumbering optimization (makes logs hard to read)
|
||
|
optHVN = true // enable pointer equivalence via Hash-Value Numbering
|
||
|
|
||
|
// debugging options; disable all when committing
|
||
|
debugHVN = false // enable assertions in HVN
|
||
|
debugHVNVerbose = false // enable extra HVN logging
|
||
|
debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
|
||
|
debugTimers = false // show running time of each phase
|
||
|
)
|
||
|
|
||
|
// object.flags bitmask values.
|
||
|
const (
|
||
|
otTagged = 1 << iota // type-tagged object
|
||
|
otIndirect // type-tagged object with indirect payload
|
||
|
otFunction // function object
|
||
|
)
|
||
|
|
||
|
// An object represents a contiguous block of memory to which some
|
||
|
// (generalized) pointer may point.
|
||
|
//
|
||
|
// (Note: most variables called 'obj' are not *objects but nodeids
|
||
|
// such that a.nodes[obj].obj != nil.)
|
||
|
//
|
||
|
type object struct {
|
||
|
// flags is a bitset of the node type (ot*) flags defined above.
|
||
|
flags uint32
|
||
|
|
||
|
// Number of following nodes belonging to the same "object"
|
||
|
// allocation. Zero for all other nodes.
|
||
|
size uint32
|
||
|
|
||
|
// data describes this object; it has one of these types:
|
||
|
//
|
||
|
// ssa.Value for an object allocated by an SSA operation.
|
||
|
// types.Type for an rtype instance object or *rtype-tagged object.
|
||
|
// string for an instrinsic object, e.g. the array behind os.Args.
|
||
|
// nil for an object allocated by an instrinsic.
|
||
|
// (cgn provides the identity of the intrinsic.)
|
||
|
data interface{}
|
||
|
|
||
|
// The call-graph node (=context) in which this object was allocated.
|
||
|
// May be nil for global objects: Global, Const, some Functions.
|
||
|
cgn *cgnode
|
||
|
}
|
||
|
|
||
|
// nodeid denotes a node.
|
||
|
// It is an index within analysis.nodes.
|
||
|
// We use small integers, not *node pointers, for many reasons:
|
||
|
// - they are smaller on 64-bit systems.
|
||
|
// - sets of them can be represented compactly in bitvectors or BDDs.
|
||
|
// - order matters; a field offset can be computed by simple addition.
|
||
|
type nodeid uint32
|
||
|
|
||
|
// A node is an equivalence class of memory locations.
|
||
|
// Nodes may be pointers, pointed-to locations, neither, or both.
|
||
|
//
|
||
|
// Nodes that are pointed-to locations ("labels") have an enclosing
|
||
|
// object (see analysis.enclosingObject).
|
||
|
//
|
||
|
type node struct {
|
||
|
// If non-nil, this node is the start of an object
|
||
|
// (addressable memory location).
|
||
|
// The following obj.size nodes implicitly belong to the object;
|
||
|
// they locate their object by scanning back.
|
||
|
obj *object
|
||
|
|
||
|
// The type of the field denoted by this node. Non-aggregate,
|
||
|
// unless this is an tagged.T node (i.e. the thing
|
||
|
// pointed to by an interface) in which case typ is that type.
|
||
|
typ types.Type
|
||
|
|
||
|
// subelement indicates which directly embedded subelement of
|
||
|
// an object of aggregate type (struct, tuple, array) this is.
|
||
|
subelement *fieldInfo // e.g. ".a.b[*].c"
|
||
|
|
||
|
// Solver state for the canonical node of this pointer-
|
||
|
// equivalence class. Each node is created with its own state
|
||
|
// but they become shared after HVN.
|
||
|
solve *solverState
|
||
|
}
|
||
|
|
||
|
// An analysis instance holds the state of a single pointer analysis problem.
|
||
|
type analysis struct {
|
||
|
config *Config // the client's control/observer interface
|
||
|
prog *ssa.Program // the program being analyzed
|
||
|
log io.Writer // log stream; nil to disable
|
||
|
panicNode nodeid // sink for panic, source for recover
|
||
|
nodes []*node // indexed by nodeid
|
||
|
flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
|
||
|
trackTypes map[types.Type]bool // memoization of shouldTrack()
|
||
|
constraints []constraint // set of constraints
|
||
|
cgnodes []*cgnode // all cgnodes
|
||
|
genq []*cgnode // queue of functions to generate constraints for
|
||
|
intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
|
||
|
globalval map[ssa.Value]nodeid // node for each global ssa.Value
|
||
|
globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
|
||
|
localval map[ssa.Value]nodeid // node for each local ssa.Value
|
||
|
localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
|
||
|
atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
|
||
|
mapValues []nodeid // values of makemap objects (indirect in HVN)
|
||
|
work nodeset // solver's worklist
|
||
|
result *Result // results of the analysis
|
||
|
track track // pointerlike types whose aliasing we track
|
||
|
deltaSpace []int // working space for iterating over PTS deltas
|
||
|
|
||
|
// Reflection & intrinsics:
|
||
|
hasher typeutil.Hasher // cache of type hashes
|
||
|
reflectValueObj types.Object // type symbol for reflect.Value (if present)
|
||
|
reflectValueCall *ssa.Function // (reflect.Value).Call
|
||
|
reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present)
|
||
|
reflectRtypePtr *types.Pointer // *reflect.rtype
|
||
|
reflectType *types.Named // reflect.Type
|
||
|
rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T
|
||
|
reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value
|
||
|
runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
|
||
|
}
|
||
|
|
||
|
// enclosingObj returns the first node of the addressable memory
|
||
|
// object that encloses node id. Panic ensues if that node does not
|
||
|
// belong to any object.
|
||
|
func (a *analysis) enclosingObj(id nodeid) nodeid {
|
||
|
// Find previous node with obj != nil.
|
||
|
for i := id; i >= 0; i-- {
|
||
|
n := a.nodes[i]
|
||
|
if obj := n.obj; obj != nil {
|
||
|
if i+nodeid(obj.size) <= id {
|
||
|
break // out of bounds
|
||
|
}
|
||
|
return i
|
||
|
}
|
||
|
}
|
||
|
panic("node has no enclosing object")
|
||
|
}
|
||
|
|
||
|
// labelFor returns the Label for node id.
|
||
|
// Panic ensues if that node is not addressable.
|
||
|
func (a *analysis) labelFor(id nodeid) *Label {
|
||
|
return &Label{
|
||
|
obj: a.nodes[a.enclosingObj(id)].obj,
|
||
|
subelement: a.nodes[id].subelement,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
|
||
|
msg := fmt.Sprintf(format, args...)
|
||
|
if a.log != nil {
|
||
|
fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
|
||
|
}
|
||
|
a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
|
||
|
}
|
||
|
|
||
|
// computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
|
||
|
func (a *analysis) computeTrackBits() {
|
||
|
if len(a.config.extendedQueries) != 0 {
|
||
|
// TODO(dh): only track the types necessary for the query.
|
||
|
a.track = trackAll
|
||
|
return
|
||
|
}
|
||
|
var queryTypes []types.Type
|
||
|
for v := range a.config.Queries {
|
||
|
queryTypes = append(queryTypes, v.Type())
|
||
|
}
|
||
|
for v := range a.config.IndirectQueries {
|
||
|
queryTypes = append(queryTypes, mustDeref(v.Type()))
|
||
|
}
|
||
|
for _, t := range queryTypes {
|
||
|
switch t.Underlying().(type) {
|
||
|
case *types.Chan:
|
||
|
a.track |= trackChan
|
||
|
case *types.Map:
|
||
|
a.track |= trackMap
|
||
|
case *types.Pointer:
|
||
|
a.track |= trackPtr
|
||
|
case *types.Slice:
|
||
|
a.track |= trackSlice
|
||
|
case *types.Interface:
|
||
|
a.track = trackAll
|
||
|
return
|
||
|
}
|
||
|
if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
|
||
|
a.track = trackAll
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Analyze runs the pointer analysis with the scope and options
|
||
|
// specified by config, and returns the (synthetic) root of the callgraph.
|
||
|
//
|
||
|
// Pointer analysis of a transitively closed well-typed program should
|
||
|
// always succeed. An error can occur only due to an internal bug.
|
||
|
//
|
||
|
func Analyze(config *Config) (result *Result, err error) {
|
||
|
if config.Mains == nil {
|
||
|
return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
|
||
|
}
|
||
|
defer func() {
|
||
|
if p := recover(); p != nil {
|
||
|
err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
|
||
|
fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
|
||
|
debug.PrintStack()
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
a := &analysis{
|
||
|
config: config,
|
||
|
log: config.Log,
|
||
|
prog: config.prog(),
|
||
|
globalval: make(map[ssa.Value]nodeid),
|
||
|
globalobj: make(map[ssa.Value]nodeid),
|
||
|
flattenMemo: make(map[types.Type][]*fieldInfo),
|
||
|
trackTypes: make(map[types.Type]bool),
|
||
|
atFuncs: make(map[*ssa.Function]bool),
|
||
|
hasher: typeutil.MakeHasher(),
|
||
|
intrinsics: make(map[*ssa.Function]intrinsic),
|
||
|
result: &Result{
|
||
|
Queries: make(map[ssa.Value]Pointer),
|
||
|
IndirectQueries: make(map[ssa.Value]Pointer),
|
||
|
},
|
||
|
deltaSpace: make([]int, 0, 100),
|
||
|
}
|
||
|
|
||
|
if false {
|
||
|
a.log = os.Stderr // for debugging crashes; extremely verbose
|
||
|
}
|
||
|
|
||
|
if a.log != nil {
|
||
|
fmt.Fprintln(a.log, "==== Starting analysis")
|
||
|
}
|
||
|
|
||
|
// Pointer analysis requires a complete program for soundness.
|
||
|
// Check to prevent accidental misconfiguration.
|
||
|
for _, pkg := range a.prog.AllPackages() {
|
||
|
// (This only checks that the package scope is complete,
|
||
|
// not that func bodies exist, but it's a good signal.)
|
||
|
if !pkg.Pkg.Complete() {
|
||
|
return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
|
||
|
rV := reflect.Pkg.Scope().Lookup("Value")
|
||
|
a.reflectValueObj = rV
|
||
|
a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
|
||
|
a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named)
|
||
|
a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype")
|
||
|
a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
|
||
|
|
||
|
// Override flattening of reflect.Value, treating it like a basic type.
|
||
|
tReflectValue := a.reflectValueObj.Type()
|
||
|
a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
|
||
|
|
||
|
// Override shouldTrack of reflect.Value and *reflect.rtype.
|
||
|
// Always track pointers of these types.
|
||
|
a.trackTypes[tReflectValue] = true
|
||
|
a.trackTypes[a.reflectRtypePtr] = true
|
||
|
|
||
|
a.rtypes.SetHasher(a.hasher)
|
||
|
a.reflectZeros.SetHasher(a.hasher)
|
||
|
}
|
||
|
if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
|
||
|
a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
|
||
|
}
|
||
|
a.computeTrackBits()
|
||
|
|
||
|
a.generate()
|
||
|
a.showCounts()
|
||
|
|
||
|
if optRenumber {
|
||
|
a.renumber()
|
||
|
}
|
||
|
|
||
|
N := len(a.nodes) // excludes solver-created nodes
|
||
|
|
||
|
if optHVN {
|
||
|
if debugHVNCrossCheck {
|
||
|
// Cross-check: run the solver once without
|
||
|
// optimization, once with, and compare the
|
||
|
// solutions.
|
||
|
savedConstraints := a.constraints
|
||
|
|
||
|
a.solve()
|
||
|
a.dumpSolution("A.pts", N)
|
||
|
|
||
|
// Restore.
|
||
|
a.constraints = savedConstraints
|
||
|
for _, n := range a.nodes {
|
||
|
n.solve = new(solverState)
|
||
|
}
|
||
|
a.nodes = a.nodes[:N]
|
||
|
|
||
|
// rtypes is effectively part of the solver state.
|
||
|
a.rtypes = typeutil.Map{}
|
||
|
a.rtypes.SetHasher(a.hasher)
|
||
|
}
|
||
|
|
||
|
a.hvn()
|
||
|
}
|
||
|
|
||
|
if debugHVNCrossCheck {
|
||
|
runtime.GC()
|
||
|
runtime.GC()
|
||
|
}
|
||
|
|
||
|
a.solve()
|
||
|
|
||
|
// Compare solutions.
|
||
|
if optHVN && debugHVNCrossCheck {
|
||
|
a.dumpSolution("B.pts", N)
|
||
|
|
||
|
if !diff("A.pts", "B.pts") {
|
||
|
return nil, fmt.Errorf("internal error: optimization changed solution")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Create callgraph.Nodes in deterministic order.
|
||
|
if cg := a.result.CallGraph; cg != nil {
|
||
|
for _, caller := range a.cgnodes {
|
||
|
cg.CreateNode(caller.fn)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Add dynamic edges to call graph.
|
||
|
var space [100]int
|
||
|
for _, caller := range a.cgnodes {
|
||
|
for _, site := range caller.sites {
|
||
|
for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
|
||
|
a.callEdge(caller, site, nodeid(callee))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return a.result, nil
|
||
|
}
|
||
|
|
||
|
// callEdge is called for each edge in the callgraph.
|
||
|
// calleeid is the callee's object node (has otFunction flag).
|
||
|
//
|
||
|
func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
|
||
|
obj := a.nodes[calleeid].obj
|
||
|
if obj.flags&otFunction == 0 {
|
||
|
panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
|
||
|
}
|
||
|
callee := obj.cgn
|
||
|
|
||
|
if cg := a.result.CallGraph; cg != nil {
|
||
|
// TODO(adonovan): opt: I would expect duplicate edges
|
||
|
// (to wrappers) to arise due to the elimination of
|
||
|
// context information, but I haven't observed any.
|
||
|
// Understand this better.
|
||
|
callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
|
||
|
}
|
||
|
|
||
|
if a.log != nil {
|
||
|
fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
|
||
|
}
|
||
|
|
||
|
// Warn about calls to non-intrinsic external functions.
|
||
|
// TODO(adonovan): de-dup these messages.
|
||
|
if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil {
|
||
|
a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
|
||
|
a.warnf(fn.Pos(), " (declared here)")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// dumpSolution writes the PTS solution to the specified file.
|
||
|
//
|
||
|
// It only dumps the nodes that existed before solving. The order in
|
||
|
// which solver-created nodes are created depends on pre-solver
|
||
|
// optimization, so we can't include them in the cross-check.
|
||
|
//
|
||
|
func (a *analysis) dumpSolution(filename string, N int) {
|
||
|
f, err := os.Create(filename)
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
for id, n := range a.nodes[:N] {
|
||
|
if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
var sep string
|
||
|
for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
|
||
|
if l >= N {
|
||
|
break
|
||
|
}
|
||
|
fmt.Fprintf(f, "%s%d", sep, l)
|
||
|
sep = " "
|
||
|
}
|
||
|
fmt.Fprintf(f, "} : %s\n", n.typ)
|
||
|
}
|
||
|
if err := f.Close(); err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// showCounts logs the size of the constraint system. A typical
|
||
|
// optimized distribution is 65% copy, 13% load, 11% addr, 5%
|
||
|
// offsetAddr, 4% store, 2% others.
|
||
|
//
|
||
|
func (a *analysis) showCounts() {
|
||
|
if a.log != nil {
|
||
|
counts := make(map[reflect.Type]int)
|
||
|
for _, c := range a.constraints {
|
||
|
counts[reflect.TypeOf(c)]++
|
||
|
}
|
||
|
fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
|
||
|
var lines []string
|
||
|
for t, n := range counts {
|
||
|
line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
|
||
|
lines = append(lines, line)
|
||
|
}
|
||
|
sort.Sort(sort.Reverse(sort.StringSlice(lines)))
|
||
|
for _, line := range lines {
|
||
|
fmt.Fprintf(a.log, "\t%s\n", line)
|
||
|
}
|
||
|
|
||
|
fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
|
||
|
|
||
|
// Show number of pointer equivalence classes.
|
||
|
m := make(map[*solverState]bool)
|
||
|
for _, n := range a.nodes {
|
||
|
m[n.solve] = true
|
||
|
}
|
||
|
fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
|
||
|
}
|
||
|
}
|