Merge pull request #8841 from filecoin-project/feat/debug-execution

PoC: FVM Debug Dual Execution
This commit is contained in:
Aayush Rajasekaran 2022-06-29 14:35:43 -04:00 committed by GitHub
commit 709fe5c65e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 291 additions and 57 deletions

View File

@ -113,6 +113,21 @@ func ReadManifest(ctx context.Context, store cbor.IpldStore, mfCid cid.Cid) (map
return metadata, nil
}
// Given a Manifest CID, get the manifest from the store and Load data into its entries
func LoadManifest(ctx context.Context, mfCid cid.Cid, adtStore adt.Store) (*manifest.Manifest, error) {
var mf manifest.Manifest
if err := adtStore.Get(ctx, mfCid, &mf); err != nil {
return nil, xerrors.Errorf("error reading manifest: %w", err)
}
if err := mf.Load(ctx, adtStore); err != nil {
return nil, xerrors.Errorf("error loading manifest entries data: %w", err)
}
return &mf, nil
}
// GetActorCodeID looks up a builtin actor's code CID by actor version and canonical actor name.
func GetActorCodeID(av Version, name string) (cid.Cid, bool) {
manifestMx.RLock()

View File

@ -1477,21 +1477,18 @@ func LiteMigration(ctx context.Context, bstore blockstore.Blockstore, newActorsM
return cid.Undef, xerrors.Errorf("failed to load state tree: %w", err)
}
oldManifest, err := stmgr.GetManifest(ctx, st)
oldManifestData, err := stmgr.GetManifestData(ctx, st)
if err != nil {
return cid.Undef, xerrors.Errorf("error loading old actor manifest: %w", err)
}
oldManifestData := manifest.ManifestData{}
if err := store.Get(ctx, oldManifest.Data, &oldManifestData); err != nil {
return cid.Undef, xerrors.Errorf("error loading old manifest data: %w", err)
}
// load new manifest
newManifest := manifest.Manifest{}
if err := store.Get(ctx, newActorsManifestCid, &newManifest); err != nil {
newManifest, err := actors.LoadManifest(ctx, newActorsManifestCid, store)
if err != nil {
return cid.Undef, xerrors.Errorf("error loading new manifest: %w", err)
}
newManifestData := manifest.ManifestData{}
var newManifestData manifest.ManifestData
if err := store.Get(ctx, newManifest.Data, &newManifestData); err != nil {
return cid.Undef, xerrors.Errorf("error loading new manifest data: %w", err)
}
@ -1506,12 +1503,13 @@ func LiteMigration(ctx context.Context, bstore blockstore.Blockstore, newActorsM
// Maps prior version code CIDs to migration functions.
migrations := make(map[cid.Cid]cid.Cid)
for _, entry := range newManifestData.Entries {
oldCodeCid, ok := oldManifest.Get(entry.Name)
for _, entry := range oldManifestData.Entries {
newCodeCid, ok := newManifest.Get(entry.Name)
if !ok {
return cid.Undef, xerrors.Errorf("code cid for %s actor not found in old manifest", entry.Name)
return cid.Undef, xerrors.Errorf("code cid for %s actor not found in new manifest", entry.Name)
}
migrations[oldCodeCid] = entry.Code
migrations[entry.Code] = newCodeCid
}
startTime := time.Now()

View File

@ -487,7 +487,7 @@ func VerifyPreSealedData(ctx context.Context, cs *store.ChainStore, sys vm.Sysca
}
vm, err := vm.NewVM(ctx, &vmopt)
if err != nil {
return cid.Undef, xerrors.Errorf("failed to create NewLegacyVM: %w", err)
return cid.Undef, xerrors.Errorf("failed to create VM: %w", err)
}
for mi, m := range template.Miners {

View File

@ -215,7 +215,7 @@ func (sm *StateManager) ListAllActors(ctx context.Context, ts *types.TipSet) ([]
return out, nil
}
func GetManifest(ctx context.Context, st *state.StateTree) (*manifest.Manifest, error) {
func GetManifestData(ctx context.Context, st *state.StateTree) (*manifest.ManifestData, error) {
wrapStore := gstStore.WrapStore(ctx, st.Store)
systemActor, err := st.GetActor(system.Address)
@ -226,30 +226,13 @@ func GetManifest(ctx context.Context, st *state.StateTree) (*manifest.Manifest,
if err != nil {
return nil, xerrors.Errorf("failed to load system actor state: %w", err)
}
actorsManifestCid := systemActorState.GetBuiltinActors()
mf := manifest.Manifest{
Version: 1,
Data: actorsManifestCid,
}
if err := mf.Load(ctx, wrapStore); err != nil {
return nil, xerrors.Errorf("failed to load actor manifest: %w", err)
}
manifestData := manifest.ManifestData{}
if err := st.Store.Get(ctx, mf.Data, &manifestData); err != nil {
return nil, xerrors.Errorf("failed to load manifest data: %w", err)
}
return &mf, nil
}
actorsManifestDataCid := systemActorState.GetBuiltinActors()
func GetManifestEntries(ctx context.Context, st *state.StateTree) ([]manifest.ManifestEntry, error) {
mf, err := GetManifest(ctx, st)
if err != nil {
return nil, xerrors.Errorf("failed to get manifest: %w", err)
var mfData manifest.ManifestData
if err := wrapStore.Get(ctx, actorsManifestDataCid, &mfData); err != nil {
return nil, xerrors.Errorf("error fetching data: %w", err)
}
manifestData := manifest.ManifestData{}
if err := st.Store.Get(ctx, mf.Data, &manifestData); err != nil {
return nil, xerrors.Errorf("filed to load manifest data: %w", err)
}
return manifestData.Entries, nil
return &mfData, nil
}

View File

@ -4,11 +4,15 @@ import (
"bytes"
"context"
"fmt"
"io"
"os"
"sort"
"sync"
"time"
"github.com/ipfs/go-cid"
cbor "github.com/ipfs/go-ipld-cbor"
cbg "github.com/whyrusleeping/cbor-gen"
"golang.org/x/xerrors"
ffi "github.com/filecoin-project/filecoin-ffi"
@ -27,10 +31,12 @@ import (
"github.com/filecoin-project/lotus/chain/state"
"github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/lib/sigs"
"github.com/filecoin-project/lotus/node/bundle"
)
var _ Interface = (*FVM)(nil)
var _ ffi_cgo.Externs = (*FvmExtern)(nil)
var debugBundleV8path = os.Getenv("LOTUS_FVM_DEBUG_BUNDLE_V8")
type FvmExtern struct {
Rand
@ -250,18 +256,18 @@ type FVM struct {
fvm *ffi.FVM
}
func NewFVM(ctx context.Context, opts *VMOpts) (*FVM, error) {
func defaultFVMOpts(ctx context.Context, opts *VMOpts) (*ffi.FVMOpts, error) {
state, err := state.LoadStateTree(cbor.NewCborStore(opts.Bstore), opts.StateBase)
if err != nil {
return nil, err
return nil, xerrors.Errorf("loading state tree: %w", err)
}
circToReport, err := opts.CircSupplyCalc(ctx, opts.Epoch, state)
if err != nil {
return nil, err
return nil, xerrors.Errorf("calculating circ supply: %w", err)
}
fvmopts := &ffi.FVMOpts{
return &ffi.FVMOpts{
FVMVersion: 0,
Externs: &FvmExtern{
Rand: opts.Rand,
@ -276,6 +282,14 @@ func NewFVM(ctx context.Context, opts *VMOpts) (*FVM, error) {
NetworkVersion: opts.NetworkVersion,
StateBase: opts.StateBase,
Tracing: EnableDetailedTracing,
}, nil
}
func NewFVM(ctx context.Context, opts *VMOpts) (*FVM, error) {
fvmOpts, err := defaultFVMOpts(ctx, opts)
if err != nil {
return nil, xerrors.Errorf("creating fvm opts: %w", err)
}
if os.Getenv("LOTUS_USE_FVM_CUSTOM_BUNDLE") == "1" {
@ -289,10 +303,109 @@ func NewFVM(ctx context.Context, opts *VMOpts) (*FVM, error) {
return nil, xerrors.Errorf("no manifest for custom bundle (actors version %d)", av)
}
fvmopts.Manifest = c
fvmOpts.Manifest = c
}
fvm, err := ffi.CreateFVM(fvmopts)
fvm, err := ffi.CreateFVM(fvmOpts)
if err != nil {
return nil, err
}
return &FVM{
fvm: fvm,
}, nil
}
func NewDebugFVM(ctx context.Context, opts *VMOpts) (*FVM, error) {
baseBstore := opts.Bstore
overlayBstore := blockstore.NewMemorySync()
cborStore := cbor.NewCborStore(overlayBstore)
vmBstore := blockstore.NewTieredBstore(overlayBstore, baseBstore)
opts.Bstore = vmBstore
fvmOpts, err := defaultFVMOpts(ctx, opts)
if err != nil {
return nil, xerrors.Errorf("creating fvm opts: %w", err)
}
fvmOpts.Debug = true
putMapping := func(ar map[cid.Cid]cid.Cid) (cid.Cid, error) {
var mapping xMapping
mapping.redirects = make([]xRedirect, 0, len(ar))
for from, to := range ar {
mapping.redirects = append(mapping.redirects, xRedirect{from: from, to: to})
}
sort.Slice(mapping.redirects, func(i, j int) bool {
return bytes.Compare(mapping.redirects[i].from.Bytes(), mapping.redirects[j].from.Bytes()) < 0
})
// Passing this as a pointer of structs has proven to be an enormous PiTA; hence this code.
mappingCid, err := cborStore.Put(context.TODO(), &mapping)
if err != nil {
return cid.Undef, err
}
return mappingCid, nil
}
createMapping := func(debugBundlePath string) error {
mfCid, err := bundle.LoadBundleFromFile(ctx, overlayBstore, debugBundlePath)
if err != nil {
return xerrors.Errorf("loading debug bundle: %w", err)
}
mf, err := actors.LoadManifest(ctx, mfCid, adt.WrapStore(ctx, cborStore))
if err != nil {
return xerrors.Errorf("loading debug manifest: %w", err)
}
// create actor redirect mapping
actorRedirect := make(map[cid.Cid]cid.Cid)
for _, key := range actors.GetBuiltinActorsKeys() {
from, ok := actors.GetActorCodeID(actors.Version8, key)
if !ok {
log.Warnf("actor missing in the from manifest %s", key)
continue
}
to, ok := mf.Get(key)
if !ok {
log.Warnf("actor missing in the to manifest %s", key)
continue
}
actorRedirect[from] = to
}
if len(actorRedirect) > 0 {
mappingCid, err := putMapping(actorRedirect)
if err != nil {
return xerrors.Errorf("error writing redirect mapping: %w", err)
}
fvmOpts.ActorRedirect = mappingCid
}
return nil
}
av, err := actors.VersionForNetwork(opts.NetworkVersion)
if err != nil {
return nil, xerrors.Errorf("error determining actors version for network version %d: %w", opts.NetworkVersion, err)
}
switch av {
case actors.Version8:
if debugBundleV8path != "" {
if err := createMapping(debugBundleV8path); err != nil {
log.Errorf("failed to create v8 debug mapping")
}
}
}
fvm, err := ffi.CreateFVM(fvmOpts)
if err != nil {
return nil, err
@ -427,3 +540,110 @@ func (vm *FVM) ApplyImplicitMessage(ctx context.Context, cmsg *types.Message) (*
func (vm *FVM) Flush(ctx context.Context) (cid.Cid, error) {
return vm.fvm.Flush()
}
type dualExecutionFVM struct {
main *FVM
debug *FVM
}
var _ Interface = (*dualExecutionFVM)(nil)
func NewDualExecutionFVM(ctx context.Context, opts *VMOpts) (Interface, error) {
main, err := NewFVM(ctx, opts)
if err != nil {
return nil, err
}
debug, err := NewDebugFVM(ctx, opts)
if err != nil {
return nil, err
}
return &dualExecutionFVM{
main: main,
debug: debug,
}, nil
}
func (vm *dualExecutionFVM) ApplyMessage(ctx context.Context, cmsg types.ChainMsg) (ret *ApplyRet, err error) {
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
ret, err = vm.main.ApplyMessage(ctx, cmsg)
}()
go func() {
defer wg.Done()
if _, err := vm.debug.ApplyMessage(ctx, cmsg); err != nil {
log.Errorf("debug execution failed: %w", err)
}
}()
wg.Wait()
return ret, err
}
func (vm *dualExecutionFVM) ApplyImplicitMessage(ctx context.Context, msg *types.Message) (ret *ApplyRet, err error) {
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
ret, err = vm.main.ApplyImplicitMessage(ctx, msg)
}()
go func() {
defer wg.Done()
if _, err := vm.debug.ApplyImplicitMessage(ctx, msg); err != nil {
log.Errorf("debug execution failed: %s", err)
}
}()
wg.Wait()
return ret, err
}
func (vm *dualExecutionFVM) Flush(ctx context.Context) (cid.Cid, error) {
return vm.main.Flush(ctx)
}
// Passing this as a pointer of structs has proven to be an enormous PiTA; hence this code.
type xRedirect struct{ from, to cid.Cid }
type xMapping struct{ redirects []xRedirect }
func (m *xMapping) MarshalCBOR(w io.Writer) error {
scratch := make([]byte, 9)
if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajArray, uint64(len(m.redirects))); err != nil {
return err
}
for _, v := range m.redirects {
if err := v.MarshalCBOR(w); err != nil {
return err
}
}
return nil
}
func (r *xRedirect) MarshalCBOR(w io.Writer) error {
scratch := make([]byte, 9)
if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajArray, uint64(2)); err != nil {
return err
}
if err := cbg.WriteCidBuf(scratch, w, r.from); err != nil {
return xerrors.Errorf("failed to write cid field from: %w", err)
}
if err := cbg.WriteCidBuf(scratch, w, r.to); err != nil {
return xerrors.Errorf("failed to write cid field from: %w", err)
}
return nil
}

View File

@ -4,7 +4,7 @@ import (
"context"
"os"
"github.com/ipfs/go-cid"
cid "github.com/ipfs/go-cid"
"github.com/filecoin-project/go-state-types/network"
@ -23,13 +23,25 @@ type Interface interface {
var useFvmForMainnetV15 = os.Getenv("LOTUS_USE_FVM_TO_SYNC_MAINNET_V15") == "1"
// WARNING: You will not affect your node's execution by misusing this feature, but you will confuse yourself thoroughly!
// An envvar that allows the user to specify debug actors bundles to be used by the FVM
// alongside regular execution. This is basically only to be used to print out specific logging information.
// Message failures, unexpected terminations,gas costs, etc. should all be ignored.
var useFvmDebug = os.Getenv("LOTUS_FVM_DEVELOPER_DEBUG") == "1"
func NewVM(ctx context.Context, opts *VMOpts) (Interface, error) {
if opts.NetworkVersion >= network.Version16 {
if useFvmDebug {
return NewDualExecutionFVM(ctx, opts)
}
return NewFVM(ctx, opts)
}
// Remove after v16 upgrade, this is only to support testing and validation of the FVM
if useFvmForMainnetV15 && opts.NetworkVersion >= network.Version15 {
if useFvmDebug {
return NewDualExecutionFVM(ctx, opts)
}
return NewFVM(ctx, opts)
}

2
extern/filecoin-ffi vendored

@ -1 +1 @@
Subproject commit 943e33574dcacd940edff0cf414c82e656bdaeb3
Subproject commit e87bffeaf690c18d656d636bdab46f0ffa90593b

View File

@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/filecoin-project/go-address"
system8 "github.com/filecoin-project/go-state-types/builtin/v8/system"
"github.com/filecoin-project/go-state-types/manifest"
"github.com/filecoin-project/go-state-types/network"
gstStore "github.com/filecoin-project/go-state-types/store"
@ -18,6 +19,7 @@ import (
"github.com/filecoin-project/lotus/blockstore"
"github.com/filecoin-project/lotus/chain/actors"
"github.com/filecoin-project/lotus/chain/actors/builtin/system"
"github.com/filecoin-project/lotus/chain/consensus/filcns"
"github.com/filecoin-project/lotus/chain/state"
"github.com/filecoin-project/lotus/chain/stmgr"
@ -47,18 +49,15 @@ func TestLiteMigration(t *testing.T) {
oldStateTree, err := state.LoadStateTree(ctxStore, stateRoot)
require.NoError(t, err)
oldManifest, err := stmgr.GetManifest(ctx, oldStateTree)
oldManifestData, err := stmgr.GetManifestData(ctx, oldStateTree)
require.NoError(t, err)
newManifestCid := makeTestManifest(t, ctxStore)
// Use the Cid we generated to get the new manifest instead of loading it from the state tree, because that would not test that we have the correct manifest in the state
// Use the Cid we generated to get the new manifest instead of loading it from the store, so as to confirm it's in the store
var newManifest manifest.Manifest
err = ctxStore.Get(ctx, newManifestCid, &newManifest)
require.NoError(t, err)
err = newManifest.Load(ctx, ctxStore)
require.NoError(t, err)
newManifestData := manifest.ManifestData{}
err = ctxStore.Get(ctx, newManifest.Data, &newManifestData)
require.NoError(t, err)
require.NoError(t, ctxStore.Get(ctx, newManifestCid, &newManifest), "error getting new manifest")
// populate the entries field of the manifest
require.NoError(t, newManifest.Load(ctx, ctxStore), "error loading new manifest")
newStateRoot, err := filcns.LiteMigration(ctx, bs, newManifestCid, stateRoot, actors.Version8, types.StateTreeVersion4, types.StateTreeVersion4)
require.NoError(t, err)
@ -67,10 +66,10 @@ func TestLiteMigration(t *testing.T) {
require.NoError(t, err)
migrations := make(map[cid.Cid]cid.Cid)
for _, entry := range newManifestData.Entries {
oldCodeCid, ok := oldManifest.Get(entry.Name)
for _, entry := range oldManifestData.Entries {
newCodeCid, ok := newManifest.Get(entry.Name)
require.True(t, ok)
migrations[oldCodeCid] = entry.Code
migrations[entry.Code] = newCodeCid
}
err = newStateTree.ForEach(func(addr address.Address, newActorState *types.Actor) error {
@ -79,6 +78,13 @@ func TestLiteMigration(t *testing.T) {
newCodeCid, ok := migrations[oldActor.Code]
require.True(t, ok)
require.Equal(t, newCodeCid, newActorState.Code)
if addr == system.Address {
var systemSt system8.State
require.NoError(t, ctxStore.Get(ctx, newActorState.Head, &systemSt))
require.Equal(t, systemSt.BuiltinActors, newManifest.Data)
}
return nil
})
require.NoError(t, err)