2022-02-09 15:19:10 +00:00
|
|
|
// Copyright © 2020 Vulcanize, Inc
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package publisher
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/csv"
|
|
|
|
"fmt"
|
2022-05-13 08:30:40 +00:00
|
|
|
"math/big"
|
2022-02-09 15:19:10 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2023-04-11 15:21:43 +00:00
|
|
|
"strconv"
|
2022-02-09 15:19:10 +00:00
|
|
|
"sync/atomic"
|
|
|
|
"time"
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
"github.com/lib/pq"
|
|
|
|
|
2022-02-09 15:19:10 +00:00
|
|
|
"github.com/ipfs/go-cid"
|
|
|
|
"github.com/multiformats/go-multihash"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
|
|
"github.com/ethereum/go-ethereum/core/types"
|
2022-02-09 15:19:10 +00:00
|
|
|
"github.com/ethereum/go-ethereum/statediff/indexer/ipld"
|
2023-04-11 15:21:43 +00:00
|
|
|
"github.com/ethereum/go-ethereum/statediff/indexer/models"
|
2022-02-09 15:19:10 +00:00
|
|
|
nodeinfo "github.com/ethereum/go-ethereum/statediff/indexer/node"
|
2023-04-11 15:21:43 +00:00
|
|
|
"github.com/ethereum/go-ethereum/statediff/indexer/shared/schema"
|
|
|
|
|
|
|
|
"github.com/cerc-io/ipld-eth-state-snapshot/pkg/prom"
|
|
|
|
snapt "github.com/cerc-io/ipld-eth-state-snapshot/pkg/types"
|
2022-02-09 15:19:10 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var _ snapt.Publisher = (*publisher)(nil)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// tables written once per block
|
2023-04-11 15:21:43 +00:00
|
|
|
perBlockTables = []*schema.Table{
|
|
|
|
&schema.TableIPLDBlock,
|
|
|
|
&schema.TableNodeInfo,
|
|
|
|
&schema.TableHeader,
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
// tables written during state iteration
|
2023-04-11 15:21:43 +00:00
|
|
|
perNodeTables = []*schema.Table{
|
|
|
|
&schema.TableIPLDBlock,
|
|
|
|
&schema.TableStateNode,
|
|
|
|
&schema.TableStorageNode,
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
const logInterval = 1 * time.Minute
|
|
|
|
|
|
|
|
type publisher struct {
|
|
|
|
dir string // dir containing output files
|
|
|
|
writers fileWriters
|
|
|
|
|
|
|
|
nodeInfo nodeinfo.Info
|
|
|
|
|
|
|
|
startTime time.Time
|
|
|
|
currBatchSize uint
|
|
|
|
stateNodeCounter uint64
|
|
|
|
storageNodeCounter uint64
|
|
|
|
codeNodeCounter uint64
|
|
|
|
txCounter uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
type fileWriter struct {
|
|
|
|
*csv.Writer
|
|
|
|
}
|
|
|
|
|
|
|
|
// fileWriters wraps the file writers for each output table
|
2022-02-17 06:34:54 +00:00
|
|
|
type fileWriters map[string]fileWriter
|
2022-02-09 15:19:10 +00:00
|
|
|
|
2022-02-16 11:27:02 +00:00
|
|
|
type fileTx struct{ fileWriters }
|
2022-02-09 15:19:10 +00:00
|
|
|
|
|
|
|
func (tx fileWriters) Commit() error {
|
|
|
|
for _, w := range tx {
|
|
|
|
w.Flush()
|
|
|
|
if err := w.Error(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
func (fileWriters) Rollback() error { return nil } // TODO: delete the file?
|
|
|
|
|
|
|
|
func newFileWriter(path string) (ret fileWriter, err error) {
|
|
|
|
file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2022-02-17 06:35:20 +00:00
|
|
|
ret = fileWriter{csv.NewWriter(file)}
|
2022-02-09 15:19:10 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
func (tx fileWriters) write(tbl *schema.Table, args ...interface{}) error {
|
2022-02-09 15:19:10 +00:00
|
|
|
row := tbl.ToCsvRow(args...)
|
2022-02-17 06:34:54 +00:00
|
|
|
return tx[tbl.Name].Write(row)
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
func makeFileWriters(dir string, tables []*schema.Table) (fileWriters, error) {
|
2022-02-09 15:19:10 +00:00
|
|
|
if err := os.MkdirAll(dir, 0755); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-02-17 06:34:54 +00:00
|
|
|
writers := fileWriters{}
|
2022-02-09 15:19:10 +00:00
|
|
|
for _, tbl := range tables {
|
|
|
|
w, err := newFileWriter(TableFile(dir, tbl.Name))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-02-17 06:34:54 +00:00
|
|
|
writers[tbl.Name] = w
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
return writers, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewPublisher creates a publisher which writes to per-table CSV files which can be imported
|
|
|
|
// with the Postgres COPY command.
|
|
|
|
// The output directory will be created if it does not exist.
|
|
|
|
func NewPublisher(path string, node nodeinfo.Info) (*publisher, error) {
|
2022-03-30 23:57:30 +00:00
|
|
|
if err := os.MkdirAll(path, 0777); err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to make MkdirAll for path: %s err: %s", path, err)
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
writers, err := makeFileWriters(path, perBlockTables)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
pub := &publisher{
|
|
|
|
writers: writers,
|
|
|
|
dir: path,
|
|
|
|
nodeInfo: node,
|
|
|
|
startTime: time.Now(),
|
|
|
|
}
|
|
|
|
go pub.logNodeCounters()
|
|
|
|
return pub, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func TableFile(dir, name string) string { return filepath.Join(dir, name+".csv") }
|
|
|
|
|
|
|
|
func (p *publisher) txDir(index uint32) string {
|
|
|
|
return filepath.Join(p.dir, fmt.Sprintf("%010d", index))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *publisher) BeginTx() (snapt.Tx, error) {
|
2022-03-14 15:24:35 +00:00
|
|
|
index := atomic.AddUint32(&p.txCounter, 1) - 1
|
2022-02-09 15:19:10 +00:00
|
|
|
dir := p.txDir(index)
|
|
|
|
writers, err := makeFileWriters(dir, perNodeTables)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return fileTx{writers}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// PublishRaw derives a cid from raw bytes and provided codec and multihash type, and writes it to the db tx
|
2023-04-11 15:21:43 +00:00
|
|
|
// returns the CID
|
|
|
|
func (tx fileWriters) publishRaw(codec uint64, raw []byte, height *big.Int) (cid string, err error) {
|
2022-02-09 15:19:10 +00:00
|
|
|
c, err := ipld.RawdataToCid(codec, raw, multihash.KECCAK_256)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
cid = c.String()
|
2023-04-11 15:21:43 +00:00
|
|
|
return tx.publishIPLD(c, raw, height)
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
|
2022-05-13 08:30:40 +00:00
|
|
|
func (tx fileWriters) publishIPLD(c cid.Cid, raw []byte, height *big.Int) (string, error) {
|
2023-04-11 15:21:43 +00:00
|
|
|
return c.String(), tx.write(&schema.TableIPLDBlock, height.String(), c.String(), raw)
|
|
|
|
}
|
|
|
|
|
|
|
|
// PublishIPLD writes an IPLD to the ipld.blocks blockstore
|
|
|
|
func (p *publisher) PublishIPLD(c cid.Cid, raw []byte, height *big.Int, snapTx snapt.Tx) (string, error) {
|
|
|
|
tx := snapTx.(fileTx)
|
|
|
|
return tx.publishIPLD(c, raw, height)
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// PublishHeader writes the header to the ipfs backing pg datastore and adds secondary
|
|
|
|
// indexes in the header_cids table
|
|
|
|
func (p *publisher) PublishHeader(header *types.Header) error {
|
|
|
|
headerNode, err := ipld.NewEthHeader(header)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-05-13 08:30:40 +00:00
|
|
|
if _, err = p.writers.publishIPLD(headerNode.Cid(), headerNode.RawData(), header.Number); err != nil {
|
2022-02-09 15:19:10 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
err = p.writers.write(&schema.TableNodeInfo, p.nodeInfo.GenesisBlock, p.nodeInfo.NetworkID, p.nodeInfo.ID,
|
2022-02-09 15:19:10 +00:00
|
|
|
p.nodeInfo.ClientName, p.nodeInfo.ChainID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-04-11 15:21:43 +00:00
|
|
|
err = p.writers.write(&schema.TableHeader,
|
|
|
|
header.Number.String(),
|
|
|
|
header.Hash().Hex(),
|
|
|
|
header.ParentHash.Hex(),
|
|
|
|
headerNode.Cid().String(),
|
|
|
|
0,
|
|
|
|
pq.StringArray([]string{p.nodeInfo.ID}),
|
|
|
|
0,
|
|
|
|
header.Root.Hex(),
|
|
|
|
header.TxHash.Hex(),
|
|
|
|
header.ReceiptHash.Hex(),
|
|
|
|
header.UncleHash.Hex(),
|
|
|
|
header.Bloom.Bytes(),
|
|
|
|
strconv.FormatUint(header.Time, 10),
|
|
|
|
header.Coinbase.String())
|
2022-02-09 15:19:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return p.writers.Commit()
|
|
|
|
}
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
// PublishStateLeafNode writes the state node eth.state_cids
|
|
|
|
func (p *publisher) PublishStateLeafNode(stateNode *models.StateNodeModel, snapTx snapt.Tx) error {
|
2022-02-16 11:27:02 +00:00
|
|
|
tx := snapTx.(fileTx)
|
2022-02-09 15:19:10 +00:00
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
err := tx.write(&schema.TableStateNode,
|
|
|
|
stateNode.BlockNumber,
|
|
|
|
stateNode.HeaderID,
|
|
|
|
stateNode.StateKey,
|
|
|
|
stateNode.CID,
|
|
|
|
true,
|
|
|
|
stateNode.Balance,
|
|
|
|
strconv.FormatUint(stateNode.Nonce, 10),
|
|
|
|
stateNode.CodeHash,
|
|
|
|
stateNode.StorageRoot,
|
|
|
|
false)
|
2022-02-09 15:19:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// increment state node counter.
|
|
|
|
atomic.AddUint64(&p.stateNodeCounter, 1)
|
2022-05-23 11:26:48 +00:00
|
|
|
prom.IncStateNodeCount()
|
|
|
|
|
2022-02-09 15:19:10 +00:00
|
|
|
// increment current batch size counter
|
|
|
|
p.currBatchSize += 2
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
// PublishStorageLeafNode writes the storage node to eth.storage_cids
|
|
|
|
func (p *publisher) PublishStorageLeafNode(storageNode *models.StorageNodeModel, snapTx snapt.Tx) error {
|
2022-02-16 11:27:02 +00:00
|
|
|
tx := snapTx.(fileTx)
|
2022-02-09 15:19:10 +00:00
|
|
|
|
2023-04-11 15:21:43 +00:00
|
|
|
err := tx.write(&schema.TableStorageNode,
|
|
|
|
storageNode.BlockNumber,
|
|
|
|
storageNode.HeaderID,
|
|
|
|
storageNode.StateKey,
|
|
|
|
storageNode.StorageKey,
|
|
|
|
storageNode.CID,
|
|
|
|
true,
|
|
|
|
storageNode.Value,
|
|
|
|
false)
|
2022-02-09 15:19:10 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// increment storage node counter.
|
|
|
|
atomic.AddUint64(&p.storageNodeCounter, 1)
|
2022-05-23 11:26:48 +00:00
|
|
|
prom.IncStorageNodeCount()
|
|
|
|
|
2022-02-09 15:19:10 +00:00
|
|
|
// increment current batch size counter
|
|
|
|
p.currBatchSize += 2
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// PublishCode writes code to the ipfs backing pg datastore
|
2022-05-13 08:30:40 +00:00
|
|
|
func (p *publisher) PublishCode(height *big.Int, codeHash common.Hash, codeBytes []byte, snapTx snapt.Tx) error {
|
2023-04-11 15:21:43 +00:00
|
|
|
c := ipld.Keccak256ToCid(ipld.RawBinary, codeHash.Bytes())
|
2022-02-09 15:19:10 +00:00
|
|
|
|
2022-02-16 11:27:02 +00:00
|
|
|
tx := snapTx.(fileTx)
|
2023-04-11 15:21:43 +00:00
|
|
|
if _, err := tx.publishIPLD(c, codeBytes, height); err != nil {
|
|
|
|
return err
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
// increment code node counter.
|
|
|
|
atomic.AddUint64(&p.codeNodeCounter, 1)
|
2022-05-23 11:26:48 +00:00
|
|
|
prom.IncCodeNodeCount()
|
|
|
|
|
2022-02-09 15:19:10 +00:00
|
|
|
p.currBatchSize++
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *publisher) PrepareTxForBatch(tx snapt.Tx, maxBatchSize uint) (snapt.Tx, error) {
|
|
|
|
return tx, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// logNodeCounters periodically logs the number of node processed.
|
|
|
|
func (p *publisher) logNodeCounters() {
|
|
|
|
t := time.NewTicker(logInterval)
|
|
|
|
for range t.C {
|
2022-03-09 13:37:33 +00:00
|
|
|
p.printNodeCounters("progress")
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-09 13:37:33 +00:00
|
|
|
func (p *publisher) printNodeCounters(msg string) {
|
2022-03-30 19:09:32 +00:00
|
|
|
logrus.WithFields(logrus.Fields{
|
2022-03-09 13:37:33 +00:00
|
|
|
"runtime": time.Now().Sub(p.startTime).String(),
|
|
|
|
"state nodes": atomic.LoadUint64(&p.stateNodeCounter),
|
|
|
|
"storage nodes": atomic.LoadUint64(&p.storageNodeCounter),
|
|
|
|
"code nodes": atomic.LoadUint64(&p.codeNodeCounter),
|
|
|
|
}).Info(msg)
|
2022-02-09 15:19:10 +00:00
|
|
|
}
|