backfill based on validation level

This commit is contained in:
Ian Norden 2020-04-03 20:36:37 -05:00
parent 649fd54a9f
commit 8960cde4f7
14 changed files with 1859 additions and 38 deletions

View File

@ -67,6 +67,7 @@ func init() {
resyncCmd.PersistentFlags().Int("resync-batch-size", 0, "data fetching batch size")
resyncCmd.PersistentFlags().Int("resync-batch-number", 0, "how many goroutines to fetch data concurrently")
resyncCmd.PersistentFlags().Bool("resync-clear-old-cache", false, "if true, clear out old data of the provided type within the resync range before resyncing")
resyncCmd.PersistentFlags().Bool("resync-reset-validation", false, "if true, reset times_validated to 0")
resyncCmd.PersistentFlags().String("btc-http-path", "", "http url for bitcoin node")
resyncCmd.PersistentFlags().String("btc-password", "", "password for btc node")
@ -88,6 +89,7 @@ func init() {
viper.BindPFlag("resync.batchSize", resyncCmd.PersistentFlags().Lookup("resync-batch-size"))
viper.BindPFlag("resync.batchNumber", resyncCmd.PersistentFlags().Lookup("resync-batch-number"))
viper.BindPFlag("resync.clearOldCache", resyncCmd.PersistentFlags().Lookup("resync-clear-old-cache"))
viper.BindPFlag("resync.resetValidation", resyncCmd.PersistentFlags().Lookup("resync-reset-validation"))
viper.BindPFlag("bitcoin.httpPath", resyncCmd.PersistentFlags().Lookup("btc-http-path"))
viper.BindPFlag("bitcoin.pass", resyncCmd.PersistentFlags().Lookup("btc-password"))

View File

@ -117,6 +117,7 @@ func init() {
superNodeCmd.PersistentFlags().Int("supernode-frequency", 0, "how often (in seconds) the backfill process checks for gaps")
superNodeCmd.PersistentFlags().Int("supernode-batch-size", 0, "data fetching batch size")
superNodeCmd.PersistentFlags().Int("supernode-batch-number", 0, "how many goroutines to fetch data concurrently")
superNodeCmd.PersistentFlags().Int("supernode-validation-level", 0, "backfill will resync any data below this level")
superNodeCmd.PersistentFlags().String("btc-ws-path", "", "ws url for bitcoin node")
superNodeCmd.PersistentFlags().String("btc-http-path", "", "http url for bitcoin node")
@ -144,6 +145,7 @@ func init() {
viper.BindPFlag("superNode.frequency", superNodeCmd.PersistentFlags().Lookup("supernode-frequency"))
viper.BindPFlag("superNode.batchSize", superNodeCmd.PersistentFlags().Lookup("supernode-batch-size"))
viper.BindPFlag("superNode.batchNumber", superNodeCmd.PersistentFlags().Lookup("supernode-batch-number"))
viper.BindPFlag("superNode.validationLevel", superNodeCmd.PersistentFlags().Lookup("supernode-validation-level"))
viper.BindPFlag("bitcoin.wsPath", superNodeCmd.PersistentFlags().Lookup("btc-ws-path"))
viper.BindPFlag("bitcoin.httpPath", superNodeCmd.PersistentFlags().Lookup("btc-http-path"))

View File

@ -5,6 +5,9 @@ ADD COLUMN log_contracts VARCHAR(66)[];
ALTER TABLE eth.receipt_cids
RENAME COLUMN contract TO contract_hash;
WITH uniques AS (SELECT DISTINCT ON (tx_id) * FROM eth.receipt_cids)
DELETE FROM eth.receipt_cids WHERE receipt_cids.id NOT IN (SELECT id FROM uniques);
ALTER TABLE eth.receipt_cids
ADD CONSTRAINT receipt_cids_tx_id_key UNIQUE (tx_id);

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@
batchSize = 1 # $RESYNC_BATCH_SIZE
batchNumber = 50 # $RESYNC_BATCH_NUMBER
clearOldCache = false # $RESYNC_CLEAR_OLD_CACHE
resetValidation = false # $RESYNC_RESET_VALIDATION
resetValidation = true # $RESYNC_RESET_VALIDATION
[superNode]
chain = "bitcoin" # $SUPERNODE_CHAIN
@ -30,6 +30,7 @@
frequency = 45 # $SUPERNODE_FREQUENCY
batchSize = 1 # $SUPERNODE_BATCH_SIZE
batchNumber = 50 # $SUPERNODE_BATCH_NUMBER
validationLevel = 1 # $SUPERNODE_VALIDATION_LEVEL
[bitcoin]
wsPath = "127.0.0.1:8332" # $BTC_WS_PATH

View File

@ -15,7 +15,7 @@
stop = 0 # $RESYNC_STOP
batchSize = 5 # $RESYNC_BATCH_SIZE
batchNumber = 50 # $RESYNC_BATCH_NUMBER
clearOldCache = true # $RESYNC_CLEAR_OLD_CACHE
clearOldCache = false # $RESYNC_CLEAR_OLD_CACHE
resetValidation = true # $RESYNC_RESET_VALIDATION
[superNode]
@ -30,6 +30,7 @@
frequency = 15 # $SUPERNODE_FREQUENCY
batchSize = 5 # $SUPERNODE_BATCH_SIZE
batchNumber = 50 # $SUPERNODE_BATCH_NUMBER
validationLevel = 1 # $SUPERNODE_VALIDATION_LEVEL
[ethereum]
wsPath = "127.0.0.1:8546" # $ETH_WS_PATH

View File

@ -44,6 +44,14 @@ var _ = Describe("GetBlockHeightBins", func() {
Expect(err).ToNot(HaveOccurred())
Expect(len(blockRangeBins)).To(Equal(100))
Expect(blockRangeBins[99]).To(Equal(lastBin))
startingBlock = 1
endingBlock = 1
batchSize = 100
blockRangeBins, err = utils.GetBlockHeightBins(startingBlock, endingBlock, batchSize)
Expect(err).ToNot(HaveOccurred())
Expect(len(blockRangeBins)).To(Equal(1))
Expect(blockRangeBins[0]).To(Equal([]uint64{1}))
})
It("throws an error if the starting block is higher than the ending block", func() {

View File

@ -63,6 +63,8 @@ type BackFillService struct {
QuitChan chan bool
// Chain type
chain shared.ChainType
// Headers with times_validated lower than this will be resynced
validationLevel int
}
// NewBackFillService returns a new BackFillInterface
@ -107,6 +109,7 @@ func NewBackFillService(settings *Config, screenAndServeChan chan shared.Convert
ScreenAndServeChan: screenAndServeChan,
QuitChan: settings.Quit,
chain: settings.Chain,
validationLevel: settings.ValidationLevel,
}, nil
}
@ -135,7 +138,7 @@ func (bfs *BackFillService) FillGapsInSuperNode(wg *sync.WaitGroup) {
log.Error(err)
}
}
gaps, err := bfs.Retriever.RetrieveGapsInData()
gaps, err := bfs.Retriever.RetrieveGapsInData(bfs.validationLevel)
if err != nil {
log.Errorf("super node db backfill RetrieveGapsInData error for chain %s: %v", bfs.chain.String(), err)
continue
@ -158,7 +161,6 @@ func (bfs *BackFillService) backFill(startingBlock, endingBlock uint64) error {
if endingBlock < startingBlock {
return fmt.Errorf("super node %s db backfill: ending block number needs to be greater than starting block number", bfs.chain.String())
}
//
// break the range up into bins of smaller ranges
blockRangeBins, err := utils.GetBlockHeightBins(startingBlock, endingBlock, bfs.BatchSize)
if err != nil {

View File

@ -161,7 +161,7 @@ func (ecr *CIDRetriever) RetrieveTxCIDs(tx *sqlx.Tx, txFilter TxFilter, headerID
}
// RetrieveGapsInData is used to find the the block numbers at which we are missing data in the db
func (ecr *CIDRetriever) RetrieveGapsInData() ([]shared.Gap, error) {
func (ecr *CIDRetriever) RetrieveGapsInData(validationLevel int) ([]shared.Gap, error) {
pgStr := `SELECT header_cids.block_number + 1 AS start, min(fr.block_number) - 1 AS stop FROM btc.header_cids
LEFT JOIN btc.header_cids r on btc.header_cids.block_number = r.block_number - 1
LEFT JOIN btc.header_cids fr on btc.header_cids.block_number < fr.block_number
@ -171,18 +171,45 @@ func (ecr *CIDRetriever) RetrieveGapsInData() ([]shared.Gap, error) {
Start uint64 `db:"start"`
Stop uint64 `db:"stop"`
}, 0)
err := ecr.db.Select(&results, pgStr)
if err != nil {
if err := ecr.db.Select(&results, pgStr); err != nil {
return nil, err
}
gaps := make([]shared.Gap, len(results))
emptyGaps := make([]shared.Gap, len(results))
for i, res := range results {
gaps[i] = shared.Gap{
emptyGaps[i] = shared.Gap{
Start: res.Start,
Stop: res.Stop,
}
}
return gaps, nil
// Find sections of blocks where we are below the validation level
// There will be no overlap between these "gaps" and the ones above
pgStr = `SELECT block_number FROM btc.header_cids
WHERE times_validated < $1
ORDER BY block_number`
var heights []uint64
if err := ecr.db.Select(&heights, pgStr, validationLevel); err != nil {
return nil, err
}
if len(heights) == 0 {
return emptyGaps, nil
}
validationGaps := make([]shared.Gap, 0)
start := heights[0]
lastHeight := start
for _, height := range heights[1:] {
if height == lastHeight+1 {
lastHeight = height
continue
}
validationGaps = append(validationGaps, shared.Gap{
Start: start,
Stop: lastHeight,
})
start = height
lastHeight = start
}
return append(emptyGaps, validationGaps...), nil
}
// RetrieveBlockByHash returns all of the CIDs needed to compose an entire block, for a given block hash

View File

@ -44,6 +44,7 @@ const (
SUPERNODE_FREQUENCY = "SUPERNODE_FREQUENCY"
SUPERNODE_BATCH_SIZE = "SUPERNODE_BATCH_SIZE"
SUPERNODE_BATCH_NUMBER = "SUPERNODE_BATCH_NUMBER"
SUPERNODE_VALIDATION_LEVEL = "SUPERNODE_VALIDATION_LEVEL"
)
// Config struct
@ -70,6 +71,7 @@ type Config struct {
Frequency time.Duration
BatchSize uint64
BatchNumber uint64
ValidationLevel int
}
// NewSuperNodeConfig is used to initialize a SuperNode config from a .toml file
@ -167,6 +169,7 @@ func (c *Config) BackFillFields() error {
viper.BindEnv("superNode.frequency", SUPERNODE_FREQUENCY)
viper.BindEnv("superNode.batchSize", SUPERNODE_BATCH_SIZE)
viper.BindEnv("superNode.batchNumber", SUPERNODE_BATCH_NUMBER)
viper.BindEnv("superNode.validationLevel", SUPERNODE_VALIDATION_LEVEL)
switch c.Chain {
case shared.Ethereum:
@ -190,5 +193,6 @@ func (c *Config) BackFillFields() error {
c.Frequency = frequency
c.BatchSize = uint64(viper.GetInt64("superNode.batchSize"))
c.BatchNumber = uint64(viper.GetInt64("superNode.batchNumber"))
c.ValidationLevel = viper.GetInt("superNode.validationLevel")
return nil
}

View File

@ -445,7 +445,8 @@ func (ecr *CIDRetriever) RetrieveStorageCIDs(tx *sqlx.Tx, storageFilter StorageF
}
// RetrieveGapsInData is used to find the the block numbers at which we are missing data in the db
func (ecr *CIDRetriever) RetrieveGapsInData() ([]shared.Gap, error) {
// it finds the union of heights where no data exists and where the times_validated is lower than the validation level
func (ecr *CIDRetriever) RetrieveGapsInData(validationLevel int) ([]shared.Gap, error) {
pgStr := `SELECT header_cids.block_number + 1 AS start, min(fr.block_number) - 1 AS stop FROM eth.header_cids
LEFT JOIN eth.header_cids r on eth.header_cids.block_number = r.block_number - 1
LEFT JOIN eth.header_cids fr on eth.header_cids.block_number < fr.block_number
@ -455,18 +456,45 @@ func (ecr *CIDRetriever) RetrieveGapsInData() ([]shared.Gap, error) {
Start uint64 `db:"start"`
Stop uint64 `db:"stop"`
}, 0)
err := ecr.db.Select(&results, pgStr)
if err != nil {
if err := ecr.db.Select(&results, pgStr); err != nil {
return nil, err
}
gaps := make([]shared.Gap, len(results))
emptyGaps := make([]shared.Gap, len(results))
for i, res := range results {
gaps[i] = shared.Gap{
emptyGaps[i] = shared.Gap{
Start: res.Start,
Stop: res.Stop,
}
}
return gaps, nil
// Find sections of blocks where we are below the validation level
// There will be no overlap between these "gaps" and the ones above
pgStr = `SELECT block_number FROM eth.header_cids
WHERE times_validated < $1
ORDER BY block_number`
var heights []uint64
if err := ecr.db.Select(&heights, pgStr, validationLevel); err != nil {
return nil, err
}
if len(heights) == 0 {
return emptyGaps, nil
}
validationGaps := make([]shared.Gap, 0)
start := heights[0]
lastHeight := start
for _, height := range heights[1:] {
if height == lastHeight+1 {
lastHeight = height
continue
}
validationGaps = append(validationGaps, shared.Gap{
Start: start,
Stop: lastHeight,
})
start = height
lastHeight = start
}
return append(emptyGaps, validationGaps...), nil
}
// RetrieveBlockByHash returns all of the CIDs needed to compose an entire block, for a given block hash

View File

@ -484,7 +484,7 @@ var _ = Describe("Retriever", func() {
Expect(err).ToNot(HaveOccurred())
err = repo.Index(&payload2)
Expect(err).ToNot(HaveOccurred())
gaps, err := retriever.RetrieveGapsInData()
gaps, err := retriever.RetrieveGapsInData(1)
Expect(err).ToNot(HaveOccurred())
Expect(len(gaps)).To(Equal(0))
})
@ -494,11 +494,29 @@ var _ = Describe("Retriever", func() {
payload.HeaderCID.BlockNumber = "5"
err := repo.Index(&payload)
Expect(err).ToNot(HaveOccurred())
gaps, err := retriever.RetrieveGapsInData()
gaps, err := retriever.RetrieveGapsInData(1)
Expect(err).ToNot(HaveOccurred())
Expect(len(gaps)).To(Equal(0))
})
It("Can handle single block gaps", func() {
payload1 := *mocks.MockCIDPayload
payload1.HeaderCID.BlockNumber = "2"
payload2 := payload1
payload2.HeaderCID.BlockNumber = "4"
err := repo.Index(mocks.MockCIDPayload)
Expect(err).ToNot(HaveOccurred())
err = repo.Index(&payload1)
Expect(err).ToNot(HaveOccurred())
err = repo.Index(&payload2)
Expect(err).ToNot(HaveOccurred())
gaps, err := retriever.RetrieveGapsInData(1)
Expect(err).ToNot(HaveOccurred())
Expect(len(gaps)).To(Equal(1))
Expect(gaps[0].Start).To(Equal(uint64(3)))
Expect(gaps[0].Stop).To(Equal(uint64(3)))
})
It("Finds gap between two entries", func() {
payload1 := *mocks.MockCIDPayload
payload1.HeaderCID.BlockNumber = "1010101"
@ -508,7 +526,7 @@ var _ = Describe("Retriever", func() {
Expect(err).ToNot(HaveOccurred())
err = repo.Index(&payload2)
Expect(err).ToNot(HaveOccurred())
gaps, err := retriever.RetrieveGapsInData()
gaps, err := retriever.RetrieveGapsInData(1)
Expect(err).ToNot(HaveOccurred())
Expect(len(gaps)).To(Equal(1))
Expect(gaps[0].Start).To(Equal(uint64(6)))
@ -540,7 +558,7 @@ var _ = Describe("Retriever", func() {
Expect(err).ToNot(HaveOccurred())
err = repo.Index(&payload6)
Expect(err).ToNot(HaveOccurred())
gaps, err := retriever.RetrieveGapsInData()
gaps, err := retriever.RetrieveGapsInData(1)
Expect(err).ToNot(HaveOccurred())
Expect(len(gaps)).To(Equal(3))
Expect(shared.ListContainsGap(gaps, shared.Gap{Start: 6, Stop: 99})).To(BeTrue())

View File

@ -57,7 +57,7 @@ type CIDRetriever interface {
Retrieve(filter SubscriptionSettings, blockNumber int64) ([]CIDsForFetching, bool, error)
RetrieveFirstBlockNumber() (int64, error)
RetrieveLastBlockNumber() (int64, error)
RetrieveGapsInData() ([]Gap, error)
RetrieveGapsInData(validationLevel int) ([]Gap, error)
}
// IPLDFetcher uses a CID wrapper to fetch an IPLD wrapper

View File

@ -46,7 +46,7 @@ func (mcr *CIDRetriever) RetrieveFirstBlockNumber() (int64, error) {
}
// RetrieveGapsInData mock method
func (mcr *CIDRetriever) RetrieveGapsInData() ([]shared.Gap, error) {
func (mcr *CIDRetriever) RetrieveGapsInData(int) ([]shared.Gap, error) {
mcr.CalledTimes++
return mcr.GapsToRetrieve, mcr.GapsToRetrieveErr
}