ipld-eth-state-snapshot/pkg/snapshot/pg/publisher.go

258 lines
7.0 KiB
Go
Raw Normal View History

2020-07-01 18:44:59 +00:00
// Copyright © 2020 Vulcanize, Inc
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package pg
2020-07-01 18:44:59 +00:00
import (
"context"
"math/big"
2023-04-11 15:21:43 +00:00
"strconv"
2021-12-23 07:52:44 +00:00
"sync/atomic"
"time"
2020-07-01 23:07:56 +00:00
"github.com/ipfs/go-cid"
2023-04-11 15:21:43 +00:00
"github.com/lib/pq"
2020-07-01 18:44:59 +00:00
"github.com/multiformats/go-multihash"
"github.com/sirupsen/logrus"
2022-03-09 13:37:33 +00:00
log "github.com/sirupsen/logrus"
2020-07-01 18:44:59 +00:00
2023-04-11 15:03:37 +00:00
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/statediff/indexer/database/sql"
"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"
"github.com/ethereum/go-ethereum/statediff/indexer/ipld"
2023-04-11 15:21:43 +00:00
"github.com/ethereum/go-ethereum/statediff/indexer/models"
2023-04-11 15:03:37 +00:00
"github.com/ethereum/go-ethereum/statediff/indexer/shared/schema"
"github.com/cerc-io/ipld-eth-state-snapshot/pkg/prom"
snapt "github.com/cerc-io/ipld-eth-state-snapshot/pkg/types"
2020-07-01 18:44:59 +00:00
)
var _ snapt.Publisher = (*publisher)(nil)
2021-12-23 14:34:34 +00:00
const logInterval = 1 * time.Minute
2021-12-23 07:52:44 +00:00
2021-12-14 06:50:19 +00:00
// Publisher is wrapper around DB.
type publisher struct {
db *postgres.DB
currBatchSize uint
2021-12-23 07:52:44 +00:00
stateNodeCounter uint64
storageNodeCounter uint64
codeNodeCounter uint64
startTime time.Time
2020-07-01 18:44:59 +00:00
}
2021-12-14 06:50:19 +00:00
// NewPublisher creates Publisher
func NewPublisher(db *postgres.DB) *publisher {
return &publisher{
db: db,
startTime: time.Now(),
2020-07-01 18:44:59 +00:00
}
}
type pubTx struct {
sql.Tx
callback func()
}
func (tx pubTx) Rollback() error { return tx.Tx.Rollback(context.Background()) }
func (tx pubTx) Commit() error {
if tx.callback != nil {
defer tx.callback()
}
return tx.Tx.Commit(context.Background())
}
func (tx pubTx) Exec(sql string, args ...interface{}) (sql.Result, error) {
return tx.Tx.Exec(context.Background(), sql, args...)
}
func (p *publisher) BeginTx() (snapt.Tx, error) {
tx, err := p.db.Begin(context.Background())
if err != nil {
return nil, err
}
go p.logNodeCounters()
return pubTx{tx, func() {
2022-03-09 13:37:33 +00:00
p.printNodeCounters("final stats")
}}, nil
}
// PublishRaw derives a cid from raw bytes and provided codec and multihash type, and writes it to the db tx
2023-04-11 15:21:43 +00:00
// returns the CID
func (tx pubTx) publishRaw(codec uint64, raw []byte, height *big.Int) (cid string, err error) {
c, err := ipld.RawdataToCid(codec, raw, multihash.KECCAK_256)
if err != nil {
return
}
cid = c.String()
2023-04-11 15:21:43 +00:00
return tx.publishIPLD(c, raw, height)
}
func (tx pubTx) publishIPLD(c cid.Cid, raw []byte, height *big.Int) (string, error) {
2023-04-11 15:03:37 +00:00
_, err := tx.Exec(schema.TableIPLDBlock.ToInsertStatement(false), height.Uint64(), c.String(), raw)
return c.String(), err
}
// PublishIPLD writes an IPLD to the ipld.blocks blockstore
func (p *publisher) PublishIPLD(c cid.Cid, raw []byte, height *big.Int, snapTx snapt.Tx) (string, error) {
tx := snapTx.(pubTx)
return tx.publishIPLD(c, raw, height)
}
// PublishHeader writes the header to the ipfs backing pg datastore and adds secondary indexes in the header_cids table
func (p *publisher) PublishHeader(header *types.Header) (err error) {
2020-07-01 18:44:59 +00:00
headerNode, err := ipld.NewEthHeader(header)
if err != nil {
return err
2020-07-01 18:44:59 +00:00
}
2022-02-16 11:27:02 +00:00
snapTx, err := p.db.Begin(context.Background())
2020-07-01 18:44:59 +00:00
if err != nil {
return err
2020-07-01 18:44:59 +00:00
}
2022-02-16 11:27:02 +00:00
tx := pubTx{snapTx, nil}
defer func() {
err = snapt.CommitOrRollback(tx, err)
if err != nil {
logrus.Errorf("CommitOrRollback failed: %s", err)
}
}()
if _, err = tx.publishIPLD(headerNode.Cid(), headerNode.RawData(), header.Number); err != nil {
return err
2020-07-01 18:44:59 +00:00
}
2023-04-11 15:21:43 +00:00
_, err = tx.Exec(schema.TableHeader.ToInsertStatement(false),
header.Number.Uint64(),
header.Hash().Hex(),
header.ParentHash.Hex(),
headerNode.Cid().String(),
"0",
pq.StringArray([]string{p.db.NodeID()}),
"0",
header.Root.Hex(),
header.TxHash.Hex(),
header.ReceiptHash.Hex(),
header.UncleHash.Hex(),
header.Bloom.Bytes(),
strconv.FormatUint(header.Time, 10),
header.Coinbase.String())
return err
2020-07-01 18:44:59 +00:00
}
2023-04-11 15:03:37 +00:00
// PublishStateLeafNode writes the state leaf node to eth.state_cids
func (p *publisher) PublishStateLeafNode(stateNode *models.StateNodeModel, snapTx snapt.Tx) error {
2022-02-16 11:27:02 +00:00
tx := snapTx.(pubTx)
2023-04-11 15:03:37 +00:00
_, err := tx.Exec(schema.TableStateNode.ToInsertStatement(false),
stateNode.BlockNumber,
stateNode.HeaderID,
stateNode.StateKey,
stateNode.CID,
2023-04-12 18:07:42 +00:00
false,
2023-04-11 15:03:37 +00:00
stateNode.Balance,
stateNode.Nonce,
stateNode.CodeHash,
stateNode.StorageRoot,
2023-04-11 15:21:43 +00:00
false)
if err != nil {
return err
}
2021-12-23 07:52:44 +00:00
// increment state node counter.
atomic.AddUint64(&p.stateNodeCounter, 1)
prom.IncStateNodeCount()
2021-12-23 07:52:44 +00:00
// increment current batch size counter
p.currBatchSize += 2
return err
2020-07-01 18:44:59 +00:00
}
2023-04-11 15:03:37 +00:00
// PublishStorageLeafNode writes the storage leaf node to eth.storage_cids
func (p *publisher) PublishStorageLeafNode(storageNode *models.StorageNodeModel, snapTx snapt.Tx) error {
2022-02-16 11:27:02 +00:00
tx := snapTx.(pubTx)
2023-04-11 15:03:37 +00:00
_, err := tx.Exec(schema.TableStorageNode.ToInsertStatement(false),
storageNode.BlockNumber,
storageNode.HeaderID,
storageNode.StateKey,
storageNode.StorageKey,
storageNode.CID,
2023-04-12 18:07:42 +00:00
false,
2023-04-11 15:03:37 +00:00
storageNode.Value,
2023-04-11 15:21:43 +00:00
false)
if err != nil {
return err
}
2021-12-23 07:52:44 +00:00
// increment storage node counter.
atomic.AddUint64(&p.storageNodeCounter, 1)
prom.IncStorageNodeCount()
2021-12-23 07:52:44 +00:00
// increment current batch size counter
p.currBatchSize += 2
return err
2020-07-01 18:44:59 +00:00
}
// PublishCode writes code to the ipfs backing pg datastore
func (p *publisher) PublishCode(height *big.Int, codeHash common.Hash, codeBytes []byte, snapTx snapt.Tx) error {
2023-04-11 15:03:37 +00:00
c := ipld.Keccak256ToCid(ipld.RawBinary, codeHash.Bytes())
2022-02-16 11:27:02 +00:00
tx := snapTx.(pubTx)
2023-04-11 15:03:37 +00:00
if _, err := tx.publishIPLD(c, codeBytes, height); err != nil {
return err
}
// increment code node counter.
atomic.AddUint64(&p.codeNodeCounter, 1)
prom.IncCodeNodeCount()
2021-12-23 07:52:44 +00:00
p.currBatchSize++
return nil
}
func (p *publisher) PrepareTxForBatch(tx snapt.Tx, maxBatchSize uint) (snapt.Tx, error) {
var err error
// maximum batch size reached, commit the current transaction and begin a new transaction.
if maxBatchSize <= p.currBatchSize {
if err = tx.Commit(); err != nil {
return nil, err
}
2022-02-16 11:27:02 +00:00
snapTx, err := p.db.Begin(context.Background())
tx = pubTx{Tx: snapTx}
if err != nil {
return nil, err
}
p.currBatchSize = 0
}
return tx, nil
}
2021-12-23 07:52:44 +00:00
// logNodeCounters periodically logs the number of node processed.
func (p *publisher) logNodeCounters() {
2021-12-23 14:34:34 +00:00
t := time.NewTicker(logInterval)
2021-12-23 07:52:44 +00:00
for range t.C {
2022-03-09 13:37:33 +00:00
p.printNodeCounters("progress")
2021-12-23 07:52:44 +00:00
}
}
2022-03-09 13:37:33 +00:00
func (p *publisher) printNodeCounters(msg string) {
log.WithFields(log.Fields{
"runtime": time.Now().Sub(p.startTime).String(),
"state nodes": atomic.LoadUint64(&p.stateNodeCounter),
"storage nodes": atomic.LoadUint64(&p.storageNodeCounter),
"code nodes": atomic.LoadUint64(&p.codeNodeCounter),
}).Info(msg)
}