ipld-eth-state-snapshot/pkg/snapshot/util.go

package snapshot

import (
	"bytes"
	"context"
	"fmt"

	"github.com/cerc-io/ipld-eth-state-snapshot/pkg/prom"
	file "github.com/cerc-io/ipld-eth-state-snapshot/pkg/snapshot/file"
	"github.com/cerc-io/ipld-eth-state-snapshot/pkg/snapshot/pg"
	snapt "github.com/cerc-io/ipld-eth-state-snapshot/pkg/types"
	"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"
)

func NewPublisher(mode SnapshotMode, config *Config) (snapt.Publisher, error) {
	switch mode {
	case PgSnapshot:
		driver, err := postgres.NewPGXDriver(context.Background(), config.DB.ConnConfig, config.Eth.NodeInfo)
		if err != nil {
			return nil, err
		}

		prom.RegisterDBCollector(config.DB.ConnConfig.DatabaseName, driver)

		return pg.NewPublisher(postgres.NewPostgresDB(driver, false)), nil
	case FileSnapshot:
		return file.NewPublisher(config.File.OutputDir, config.Eth.NodeInfo)
	}
	return nil, fmt.Errorf("invalid snapshot mode: %s", mode)
}

// Subtracts 1 from the last byte in a path slice, carrying if needed.
// Does nothing, returning false, for all-zero inputs.
func decrementPath(path []byte) bool {
	// check for all zeros
	allzero := true
	for i := 0; i < len(path); i++ {
		allzero = allzero && path[i] == 0
	}
	if allzero {
		return false
	}
	for i := len(path) - 1; i >= 0; i-- {
		val := path[i]
		path[i]--
		if val == 0 {
			path[i] = 0xf
		} else {
			return true
		}
	}
	return true
}

// Estimate the number of iterations necessary to step from start to end.
func estimateSteps(start []byte, end []byte, depth int) uint64 {
	// We see paths in several forms (nil, 0600, 06, etc.). We need to adjust them to a comparable form.
	// For nil, start and end indicate the extremes of 0x0 and 0x10.  For differences in depth, we often see a
	// start/end range on a bounded iterator specified like 0500:0600, while the value returned by it.Path() may
	// be shorter, like 06.  Since our goal is to estimate how many steps it would take to move from start to end,
	// we want to perform the comparison at a stable depth, since to move from 05 to 06 is only 1 step, but
	// to move from 0500:06 is 16.
	normalizePathRange := func(start []byte, end []byte, depth int) ([]byte, []byte) {
		if 0 == len(start) {
			start = []byte{0x0}
		}
		if 0 == len(end) {
			end = []byte{0x10}
		}
		normalizedStart := make([]byte, depth)
		normalizedEnd := make([]byte, depth)
		for i := 0; i < depth; i++ {
			if i < len(start) {
				normalizedStart[i] = start[i]
			}
			if i < len(end) {
				normalizedEnd[i] = end[i]
			}
		}
		return normalizedStart, normalizedEnd
	}

	// We have no need to handle negative exponents, so uints are fine.
	pow := func(x uint64, y uint) uint64 {
		if 0 == y {
			return 1
		}
		ret := x
		for i := uint(0); i < y; i++ {
			ret *= x
		}
		return x
	}

	// Fix the paths.
	start, end = normalizePathRange(start, end, depth)

	// No negative distances, if the start is already >= end, the distance is 0.
	if bytes.Compare(start, end) >= 0 {
		return 0
	}

	// Subtract each component, right to left, carrying over if necessary.
	difference := make([]byte, len(start))
	var carry byte = 0
	for i := len(start) - 1; i >= 0; i-- {
		result := end[i] - start[i] - carry
		if result > 0xf && i > 0 {
			result &= 0xf
			carry = 1
		} else {
			carry = 0
		}
		difference[i] = result
	}

	// Calculate the result.
	var ret uint64 = 0
	for i := 0; i < len(difference); i++ {
		ret += uint64(difference[i]) * pow(16, uint(len(difference)-i-1))
	}

	return ret
}

// https://github.com/ethereum/go-ethereum/blob/master/trie/encoding.go#L97
func keybytesToHex(str []byte) []byte {
	l := len(str)*2 + 1
	var nibbles = make([]byte, l)
	for i, b := range str {
		nibbles[i*2] = b / 16
		nibbles[i*2+1] = b % 16
	}
	nibbles[l-1] = 16
	return nibbles
}

func updateSeekedPath(seekedPath *[]byte, nodePath []byte) {
	// assumes len(nodePath) <= max len(*seekedPath)
	*seekedPath = (*seekedPath)[:len(nodePath)]
	copy(*seekedPath, nodePath)
}

// checks that the provided node path is before the end path
func checkUpperPathBound(nodePath, endPath []byte) bool {
	// every path is before nil endPath
	if endPath == nil {
		return true
	}

	if len(endPath)%2 == 0 {
		// in case of even length endpath
		// apply open interval filter since the node at endpath will be covered by the next iterator
		return bytes.Compare(nodePath, endPath) < 0
	}

	return bytes.Compare(nodePath, endPath) <= 0
}

func max(a int, b int) int {
	if a > b {
		return a
	}

	return b
}
create csv file publisher; update geth and schema 2022-02-09 15:19:10 +00:00			`package snapshot`

			`import (`
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com> 2022-08-03 11:35:04 +00:00			`"bytes"`
create csv file publisher; update geth and schema 2022-02-09 15:19:10 +00:00			`"context"`
			`"fmt"`

Cerc refactor (#64) * cerc refactor waiting on unpublished dependencies * more cerc_refactor with published ipld-eth-db unstable * TearDownDB refactor * missed second publisher_test TearDownDB 2022-09-20 17:47:34 +00:00			`"github.com/cerc-io/ipld-eth-state-snapshot/pkg/prom"`
			`file "github.com/cerc-io/ipld-eth-state-snapshot/pkg/snapshot/file"`
update service 2023-04-12 18:07:42 +00:00			`"github.com/cerc-io/ipld-eth-state-snapshot/pkg/snapshot/pg"`
Cerc refactor (#64) * cerc refactor waiting on unpublished dependencies * more cerc_refactor with published ipld-eth-db unstable * TearDownDB refactor * missed second publisher_test TearDownDB 2022-09-20 17:47:34 +00:00			`snapt "github.com/cerc-io/ipld-eth-state-snapshot/pkg/types"`
Add progress counter to prometheus output (#76) * Add a progress counter by checking the distance already traversed from the startPath to endPath in a bounded iterator vs the estimated number of iterations. 2023-05-23 16:23:58 +00:00			`"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"`
create csv file publisher; update geth and schema 2022-02-09 15:19:10 +00:00			`)`

			`func NewPublisher(mode SnapshotMode, config *Config) (snapt.Publisher, error) {`
			`switch mode {`
			`case PgSnapshot:`
			`driver, err := postgres.NewPGXDriver(context.Background(), config.DB.ConnConfig, config.Eth.NodeInfo)`
			`if err != nil {`
			`return nil, err`
			`}`
Add prometheus metrics collection (#33) * Upgrade geth * Add prometheus metrics collection * Update README 2022-05-23 11:26:48 +00:00
			`prom.RegisterDBCollector(config.DB.ConnConfig.DatabaseName, driver)`

refactor to work with v4 vdb geth v1.11.5 2023-03-31 15:39:27 +00:00			`return pg.NewPublisher(postgres.NewPostgresDB(driver, false)), nil`
create csv file publisher; update geth and schema 2022-02-09 15:19:10 +00:00			`case FileSnapshot:`
			`return file.NewPublisher(config.File.OutputDir, config.Eth.NodeInfo)`
			`}`
			`return nil, fmt.Errorf("invalid snapshot mode: %s", mode)`
			`}`
decrement path from restored iterator to cover node gaps 2022-06-06 12:08:38 +00:00
			`// Subtracts 1 from the last byte in a path slice, carrying if needed.`
			`// Does nothing, returning false, for all-zero inputs.`
			`func decrementPath(path []byte) bool {`
			`// check for all zeros`
			`allzero := true`
			`for i := 0; i < len(path); i++ {`
			`allzero = allzero && path[i] == 0`
			`}`
			`if allzero {`
			`return false`
			`}`
			`for i := len(path) - 1; i >= 0; i-- {`
			`val := path[i]`
			`path[i]--`
			`if val == 0 {`
			`path[i] = 0xf`
			`} else {`
			`return true`
			`}`
			`}`
			`return true`
			`}`
Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com> 2022-08-03 11:35:04 +00:00
Add progress counter to prometheus output (#76) * Add a progress counter by checking the distance already traversed from the startPath to endPath in a bounded iterator vs the estimated number of iterations. 2023-05-23 16:23:58 +00:00			`// Estimate the number of iterations necessary to step from start to end.`
			`func estimateSteps(start []byte, end []byte, depth int) uint64 {`
			`// We see paths in several forms (nil, 0600, 06, etc.). We need to adjust them to a comparable form.`
			`// For nil, start and end indicate the extremes of 0x0 and 0x10. For differences in depth, we often see a`
			`// start/end range on a bounded iterator specified like 0500:0600, while the value returned by it.Path() may`
			`// be shorter, like 06. Since our goal is to estimate how many steps it would take to move from start to end,`
			`// we want to perform the comparison at a stable depth, since to move from 05 to 06 is only 1 step, but`
			`// to move from 0500:06 is 16.`
			`normalizePathRange := func(start []byte, end []byte, depth int) ([]byte, []byte) {`
			`if 0 == len(start) {`
			`start = []byte{0x0}`
			`}`
			`if 0 == len(end) {`
			`end = []byte{0x10}`
			`}`
			`normalizedStart := make([]byte, depth)`
			`normalizedEnd := make([]byte, depth)`
			`for i := 0; i < depth; i++ {`
			`if i < len(start) {`
			`normalizedStart[i] = start[i]`
			`}`
			`if i < len(end) {`
			`normalizedEnd[i] = end[i]`
			`}`
			`}`
			`return normalizedStart, normalizedEnd`
			`}`

			`// We have no need to handle negative exponents, so uints are fine.`
			`pow := func(x uint64, y uint) uint64 {`
			`if 0 == y {`
			`return 1`
			`}`
			`ret := x`
			`for i := uint(0); i < y; i++ {`
			`ret *= x`
			`}`
			`return x`
			`}`

			`// Fix the paths.`
			`start, end = normalizePathRange(start, end, depth)`

			`// No negative distances, if the start is already >= end, the distance is 0.`
			`if bytes.Compare(start, end) >= 0 {`
			`return 0`
			`}`

			`// Subtract each component, right to left, carrying over if necessary.`
			`difference := make([]byte, len(start))`
			`var carry byte = 0`
			`for i := len(start) - 1; i >= 0; i-- {`
			`result := end[i] - start[i] - carry`
			`if result > 0xf && i > 0 {`
			`result &= 0xf`
			`carry = 1`
			`} else {`
			`carry = 0`
			`}`
			`difference[i] = result`
			`}`

			`// Calculate the result.`
			`var ret uint64 = 0`
			`for i := 0; i < len(difference); i++ {`
			`ret += uint64(difference[i]) * pow(16, uint(len(difference)-i-1))`
			`}`

			`return ret`
			`}`

Account selective snapshot (#46) * snapshotter ignores nodes not along a path along those derived from a list of account addresses if one is provided * config and env updates * cmd update * Encode watched address path bytes to hex for comparison * actually ignore the subtries that are not along the paths of interest * Fixes for account selective snapshot * Use non-concurrent iterator when having a single worker * Only index root node when starting path of an iterator is nil * Upgrade deps * Avoid tracking iterators and skip recovery test * Fix recovery mechanism, use sync Map instead of buffered channels * Add test for account selective snapshot * Continue traversal with concurrent iterators with starting path nil * Use errgroup to simplify error handling with concurrent iterators * Check if all the nodes are indexed in the recovery test * Use concurrency safe sync Map in account selective snapshot test * Only track concurrent iterators and refactor code * Fix node and recovered path comparison * Revert back to using buffered channels for tracking iterators * Add a metric to monitor number of active iterators * Update docs * Update seeked path after node is processed * Return error on context cancellation from subtrie iteration * Add tests for account selective snapshot recovery * Explicity enforce concurrent iterator bounds to avoid duplicate nodes * Update full snapshot test to check nodes being indexed * Refactor code to simplify snapshot logic * Remove unnecessary function argument * Use ctx cancellation for handling signals * Add descriptive comments Co-authored-by: prathamesh0 <prathamesh.musale0@gmail.com> 2022-08-03 11:35:04 +00:00			`// https://github.com/ethereum/go-ethereum/blob/master/trie/encoding.go#L97`
			`func keybytesToHex(str []byte) []byte {`
			`l := len(str)*2 + 1`
			`var nibbles = make([]byte, l)`
			`for i, b := range str {`
			`nibbles[i*2] = b / 16`
			`nibbles[i*2+1] = b % 16`
			`}`
			`nibbles[l-1] = 16`
			`return nibbles`
			`}`

			`func updateSeekedPath(seekedPath *[]byte, nodePath []byte) {`
			`// assumes len(nodePath) <= max len(*seekedPath)`
			`seekedPath = (seekedPath)[:len(nodePath)]`
			`copy(*seekedPath, nodePath)`
			`}`

			`// checks that the provided node path is before the end path`
			`func checkUpperPathBound(nodePath, endPath []byte) bool {`
			`// every path is before nil endPath`
			`if endPath == nil {`
			`return true`
			`}`

			`if len(endPath)%2 == 0 {`
			`// in case of even length endpath`
			`// apply open interval filter since the node at endpath will be covered by the next iterator`
			`return bytes.Compare(nodePath, endPath) < 0`
			`}`

			`return bytes.Compare(nodePath, endPath) <= 0`
			`}`
Add progress counter to prometheus output (#76) * Add a progress counter by checking the distance already traversed from the startPath to endPath in a bounded iterator vs the estimated number of iterations. 2023-05-23 16:23:58 +00:00
			`func max(a int, b int) int {`
			`if a > b {`
			`return a`
			`}`

			`return b`
			`}`