feat(iavl): initialize disk layout (#25624)

This commit is contained in:
Aaron Craelius 2025-12-03 11:21:22 -05:00 committed by GitHub
parent 29314c9992
commit 65f680946c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 278 additions and 0 deletions

View File

@ -0,0 +1,64 @@
package internal
import (
"fmt"
"unsafe"
)
const (
sizeBranch = 76
)
func init() {
// Verify the size of BranchLayout is what we expect it to be at runtime.
if unsafe.Sizeof(BranchLayout{}) != sizeBranch {
panic(fmt.Sprintf("invalid BranchLayout size: got %d, want %d", unsafe.Sizeof(BranchLayout{}), sizeBranch))
}
}
// BranchLayout is the on-disk layout of a branch node.
// NOTE: changes to this struct will affect on-disk compatibility.
type BranchLayout struct {
// ID is the NodeID of this branch node.
ID NodeID
// Left is the NodeID of the left child node.
Left NodeID
// Right is the NodeID of the right child node.
Right NodeID
// NOTE: Left and right offsets are included for performance and take up an extra 8 bytes of storage for each branch node.
// In an alternate design we stored only NodeID or offset for left and right depending on whether they are local
// to this changeset or in a different changeset.
// This saved 8 bytes of storage per branch node but made the implementation significantly more complex.
// For now, we are including both the left and right IDs and offsets, but if storage space becomes a problem
// we can revisit the earlier design and have an 8-byte NodeIDOrOffset type for Left and Right.
// LeftOffset is the 1-based offset of the left child node if it is in this changeset, 0 otherwise.
// The Left NodeID will indicate whether this is a branch or leaf node.
LeftOffset uint32
// RightOffset is the 1-based offset of the right child node if it is in this changeset, 0 otherwise.
// The Right NodeID will indicate whether this is a branch or leaf node.
RightOffset uint32
// KeyOffset is the offset the key data for this node in the key value data file.
// NOTE: that a 32-bit offset means that the key data file can be at most 4GB in size.
// This doesn't limit the size of the overall tree, it just limits the size of individual key/value data files.
// If we want to support larger key/value data files in the future, we can change this to a 40-bit offset,
// and an additional byte of padding is already reserved below for this purpose.
KeyOffset uint32
// Height is the height of this branch node in the tree.
Height uint8
// NOTE: there are two bytes of padding here that could be used for something else in the future if needed
// such as an extra byte to allow for 40-bit key offsets.
// Size is the number of leaf nodes in the subtree rooted at this branch node.
Size Uint40
// Hash is the hash of this branch node.
Hash [32]byte
}

View File

@ -0,0 +1,36 @@
package internal
import (
"fmt"
"unsafe"
)
const (
sizeLeaf = 44
)
func init() {
// Verify the size of LeafLayout is what we expect it to be at runtime.
if unsafe.Sizeof(LeafLayout{}) != sizeLeaf {
panic(fmt.Sprintf("invalid LeafLayout size: got %d, want %d", unsafe.Sizeof(LeafLayout{}), sizeLeaf))
}
}
// LeafLayout is the on-disk layout of a leaf node.
// NOTE: changes to this struct will affect on-disk compatibility.
type LeafLayout struct {
// ID is the NodeID of this leaf node.
ID NodeID
// KeyOffset is the offset the key data for this node in the key value data file.
// NOTE: that a 32-bit offset means that the key data file can be at most 4GB in size.
// If we want to support larger key/value data files in the future, we can change this to a 40-bit offset.
// However, this would require changing the size of this struct from 44 bytes to 48 bytes which would break
// on-disk compatibility.
// Such an upgrade could be made by introducing a "wide changeset" format that lives alongside
// this existing "compact" format.
KeyOffset uint32
// Hash is the hash of this leaf node.
Hash [32]byte
}

62
iavl/internal/node_id.go Normal file
View File

@ -0,0 +1,62 @@
package internal
import "fmt"
// NodeID is a stable identifier for a node in the IAVL tree.
// A NodeID allows for a 32-bit version and a 31-bit index within that version,
// with 1 bit used to indicate whether the node is a leaf or branch.
// A 32-bit version should allow for 136 years of 1-second blocks.
// If block production significantly speeds up, we can increase the width of the version field in the future.
// This sort of change can be done without any major on-disk migration because we can simply create a "wide changeset"
// format that lives alongside the existing "compact" format.
// Because the cost of migration is low, we have decided to keep things simple and compact for now.
type NodeID struct {
// Version is the version of the tree at which this node was created.
Version uint32
// FlagIndex indicates whether this is a branch or leaf node and stores its index in the tree.
FlagIndex NodeFlagIndex
}
// NodeFlagIndex is the index of an IAVL node in the tree plus a flag indicating whether this is a branch or leaf node.
// For leaf nodes, the index value is the 1-based in-order index of the leaf node with reference to other leaf nodes in this version.
// For branch nodes, the index value is the 1-based post-order traversal index of the node within this version.
// Bit 31 indicates whether this is a branch or leaf node (0 for branch, 1 for leaf).
type NodeFlagIndex uint32
// NewNodeID creates a new NodeID.
func NewNodeID(isLeaf bool, version, index uint32) NodeID {
return NodeID{
Version: version,
FlagIndex: NewNodeFlagIndex(isLeaf, index),
}
}
// IsLeaf returns true if the node is a leaf node.
func (id NodeID) IsLeaf() bool {
return id.FlagIndex.IsLeaf()
}
// String returns a string representation of the NodeID.
func (id NodeID) String() string {
return fmt.Sprintf("NodeID{leaf:%t, version:%d, index:%d}", id.IsLeaf(), id.Version, id.FlagIndex.Index())
}
// NewNodeFlagIndex creates a new NodeFlagIndex.
func NewNodeFlagIndex(isLeaf bool, index uint32) NodeFlagIndex {
idx := NodeFlagIndex(index)
if isLeaf {
idx |= 1 << 31
}
return idx
}
// IsLeaf returns true if the node is a leaf node.
func (index NodeFlagIndex) IsLeaf() bool {
return index&(1<<31) != 0
}
// Index returns the index of the node in the tree.
func (index NodeFlagIndex) Index() uint32 {
return uint32(index) & 0x7FFFFFFF
}

View File

@ -0,0 +1,36 @@
package internal
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestNodeID(t *testing.T) {
tests := []struct {
name string
leaf bool
version uint32
index uint32
str string
}{
{
name: "leaf1_1",
leaf: true, version: 1, index: 1,
str: "NodeID{leaf:true, version:1, index:1}",
},
{
name: "branch2_3", version: 2, index: 3,
str: "NodeID{leaf:false, version:2, index:3}",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
id := NewNodeID(test.leaf, test.version, test.index)
require.Equal(t, test.leaf, id.IsLeaf())
require.Equal(t, test.index, id.FlagIndex.Index())
require.Equal(t, test.version, id.Version)
require.Equal(t, test.str, id.String())
})
}
}

30
iavl/internal/uint40.go Normal file
View File

@ -0,0 +1,30 @@
package internal
import "fmt"
// Uint40 is a 40-bit unsigned integer stored in 5 bytes with little-endian encoding.
type Uint40 [5]byte
// NewUint40 creates a new Uint40 from a uint64.
func NewUint40(v uint64) Uint40 {
if v>>40 != 0 {
panic(fmt.Sprintf("value %d overflows Uint40", v))
}
var u Uint40
u[0] = byte(v)
u[1] = byte(v >> 8)
u[2] = byte(v >> 16)
u[3] = byte(v >> 24)
u[4] = byte(v >> 32)
return u
}
// ToUint64 converts the Uint40 to a uint64.
func (u Uint40) ToUint64() uint64 {
return uint64(u[0]) | uint64(u[1])<<8 | uint64(u[2])<<16 | uint64(u[3])<<24 | uint64(u[4])<<32
}
// String implements fmt.Stringer.
func (u Uint40) String() string {
return fmt.Sprintf("%d", u.ToUint64())
}

View File

@ -0,0 +1,50 @@
package internal
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestUint40(t *testing.T) {
tests := []struct {
name string
value uint64
expectPanic bool
str string
}{
{
name: "zero",
str: "0",
},
{
name: "max",
value: 1<<40 - 1,
str: "1099511627775",
},
{
name: "arbitrary",
value: 109951162777,
str: "109951162777",
},
{
name: "overflow",
value: 1 << 40,
expectPanic: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.expectPanic {
require.Panics(t, func() {
_ = NewUint40(tt.value)
})
} else {
u := NewUint40(tt.value)
got := u.ToUint64()
require.Equal(t, tt.value, got)
require.Equal(t, tt.str, u.String())
}
})
}
}