From 65f680946ccb19b3311bf3da00b1b2040bd0f69a Mon Sep 17 00:00:00 2001 From: Aaron Craelius Date: Wed, 3 Dec 2025 11:21:22 -0500 Subject: [PATCH] feat(iavl): initialize disk layout (#25624) --- iavl/internal/branch_layout.go | 64 ++++++++++++++++++++++++++++++++++ iavl/internal/leaf_layout.go | 36 +++++++++++++++++++ iavl/internal/node_id.go | 62 ++++++++++++++++++++++++++++++++ iavl/internal/node_id_test.go | 36 +++++++++++++++++++ iavl/internal/uint40.go | 30 ++++++++++++++++ iavl/internal/uint40_test.go | 50 ++++++++++++++++++++++++++ 6 files changed, 278 insertions(+) create mode 100644 iavl/internal/branch_layout.go create mode 100644 iavl/internal/leaf_layout.go create mode 100644 iavl/internal/node_id.go create mode 100644 iavl/internal/node_id_test.go create mode 100644 iavl/internal/uint40.go create mode 100644 iavl/internal/uint40_test.go diff --git a/iavl/internal/branch_layout.go b/iavl/internal/branch_layout.go new file mode 100644 index 0000000000..7ed33d4403 --- /dev/null +++ b/iavl/internal/branch_layout.go @@ -0,0 +1,64 @@ +package internal + +import ( + "fmt" + "unsafe" +) + +const ( + sizeBranch = 76 +) + +func init() { + // Verify the size of BranchLayout is what we expect it to be at runtime. + if unsafe.Sizeof(BranchLayout{}) != sizeBranch { + panic(fmt.Sprintf("invalid BranchLayout size: got %d, want %d", unsafe.Sizeof(BranchLayout{}), sizeBranch)) + } +} + +// BranchLayout is the on-disk layout of a branch node. +// NOTE: changes to this struct will affect on-disk compatibility. +type BranchLayout struct { + // ID is the NodeID of this branch node. + ID NodeID + + // Left is the NodeID of the left child node. + Left NodeID + + // Right is the NodeID of the right child node. + Right NodeID + + // NOTE: Left and right offsets are included for performance and take up an extra 8 bytes of storage for each branch node. + // In an alternate design we stored only NodeID or offset for left and right depending on whether they are local + // to this changeset or in a different changeset. + // This saved 8 bytes of storage per branch node but made the implementation significantly more complex. + // For now, we are including both the left and right IDs and offsets, but if storage space becomes a problem + // we can revisit the earlier design and have an 8-byte NodeIDOrOffset type for Left and Right. + + // LeftOffset is the 1-based offset of the left child node if it is in this changeset, 0 otherwise. + // The Left NodeID will indicate whether this is a branch or leaf node. + LeftOffset uint32 + + // RightOffset is the 1-based offset of the right child node if it is in this changeset, 0 otherwise. + // The Right NodeID will indicate whether this is a branch or leaf node. + RightOffset uint32 + + // KeyOffset is the offset the key data for this node in the key value data file. + // NOTE: that a 32-bit offset means that the key data file can be at most 4GB in size. + // This doesn't limit the size of the overall tree, it just limits the size of individual key/value data files. + // If we want to support larger key/value data files in the future, we can change this to a 40-bit offset, + // and an additional byte of padding is already reserved below for this purpose. + KeyOffset uint32 + + // Height is the height of this branch node in the tree. + Height uint8 + + // NOTE: there are two bytes of padding here that could be used for something else in the future if needed + // such as an extra byte to allow for 40-bit key offsets. + + // Size is the number of leaf nodes in the subtree rooted at this branch node. + Size Uint40 + + // Hash is the hash of this branch node. + Hash [32]byte +} diff --git a/iavl/internal/leaf_layout.go b/iavl/internal/leaf_layout.go new file mode 100644 index 0000000000..95e67ca3a9 --- /dev/null +++ b/iavl/internal/leaf_layout.go @@ -0,0 +1,36 @@ +package internal + +import ( + "fmt" + "unsafe" +) + +const ( + sizeLeaf = 44 +) + +func init() { + // Verify the size of LeafLayout is what we expect it to be at runtime. + if unsafe.Sizeof(LeafLayout{}) != sizeLeaf { + panic(fmt.Sprintf("invalid LeafLayout size: got %d, want %d", unsafe.Sizeof(LeafLayout{}), sizeLeaf)) + } +} + +// LeafLayout is the on-disk layout of a leaf node. +// NOTE: changes to this struct will affect on-disk compatibility. +type LeafLayout struct { + // ID is the NodeID of this leaf node. + ID NodeID + + // KeyOffset is the offset the key data for this node in the key value data file. + // NOTE: that a 32-bit offset means that the key data file can be at most 4GB in size. + // If we want to support larger key/value data files in the future, we can change this to a 40-bit offset. + // However, this would require changing the size of this struct from 44 bytes to 48 bytes which would break + // on-disk compatibility. + // Such an upgrade could be made by introducing a "wide changeset" format that lives alongside + // this existing "compact" format. + KeyOffset uint32 + + // Hash is the hash of this leaf node. + Hash [32]byte +} diff --git a/iavl/internal/node_id.go b/iavl/internal/node_id.go new file mode 100644 index 0000000000..7eff3ae501 --- /dev/null +++ b/iavl/internal/node_id.go @@ -0,0 +1,62 @@ +package internal + +import "fmt" + +// NodeID is a stable identifier for a node in the IAVL tree. +// A NodeID allows for a 32-bit version and a 31-bit index within that version, +// with 1 bit used to indicate whether the node is a leaf or branch. +// A 32-bit version should allow for 136 years of 1-second blocks. +// If block production significantly speeds up, we can increase the width of the version field in the future. +// This sort of change can be done without any major on-disk migration because we can simply create a "wide changeset" +// format that lives alongside the existing "compact" format. +// Because the cost of migration is low, we have decided to keep things simple and compact for now. +type NodeID struct { + // Version is the version of the tree at which this node was created. + Version uint32 + + // FlagIndex indicates whether this is a branch or leaf node and stores its index in the tree. + FlagIndex NodeFlagIndex +} + +// NodeFlagIndex is the index of an IAVL node in the tree plus a flag indicating whether this is a branch or leaf node. +// For leaf nodes, the index value is the 1-based in-order index of the leaf node with reference to other leaf nodes in this version. +// For branch nodes, the index value is the 1-based post-order traversal index of the node within this version. +// Bit 31 indicates whether this is a branch or leaf node (0 for branch, 1 for leaf). +type NodeFlagIndex uint32 + +// NewNodeID creates a new NodeID. +func NewNodeID(isLeaf bool, version, index uint32) NodeID { + return NodeID{ + Version: version, + FlagIndex: NewNodeFlagIndex(isLeaf, index), + } +} + +// IsLeaf returns true if the node is a leaf node. +func (id NodeID) IsLeaf() bool { + return id.FlagIndex.IsLeaf() +} + +// String returns a string representation of the NodeID. +func (id NodeID) String() string { + return fmt.Sprintf("NodeID{leaf:%t, version:%d, index:%d}", id.IsLeaf(), id.Version, id.FlagIndex.Index()) +} + +// NewNodeFlagIndex creates a new NodeFlagIndex. +func NewNodeFlagIndex(isLeaf bool, index uint32) NodeFlagIndex { + idx := NodeFlagIndex(index) + if isLeaf { + idx |= 1 << 31 + } + return idx +} + +// IsLeaf returns true if the node is a leaf node. +func (index NodeFlagIndex) IsLeaf() bool { + return index&(1<<31) != 0 +} + +// Index returns the index of the node in the tree. +func (index NodeFlagIndex) Index() uint32 { + return uint32(index) & 0x7FFFFFFF +} diff --git a/iavl/internal/node_id_test.go b/iavl/internal/node_id_test.go new file mode 100644 index 0000000000..a99f44c37d --- /dev/null +++ b/iavl/internal/node_id_test.go @@ -0,0 +1,36 @@ +package internal + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNodeID(t *testing.T) { + tests := []struct { + name string + leaf bool + version uint32 + index uint32 + str string + }{ + { + name: "leaf1_1", + leaf: true, version: 1, index: 1, + str: "NodeID{leaf:true, version:1, index:1}", + }, + { + name: "branch2_3", version: 2, index: 3, + str: "NodeID{leaf:false, version:2, index:3}", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + id := NewNodeID(test.leaf, test.version, test.index) + require.Equal(t, test.leaf, id.IsLeaf()) + require.Equal(t, test.index, id.FlagIndex.Index()) + require.Equal(t, test.version, id.Version) + require.Equal(t, test.str, id.String()) + }) + } +} diff --git a/iavl/internal/uint40.go b/iavl/internal/uint40.go new file mode 100644 index 0000000000..d7b9b24f80 --- /dev/null +++ b/iavl/internal/uint40.go @@ -0,0 +1,30 @@ +package internal + +import "fmt" + +// Uint40 is a 40-bit unsigned integer stored in 5 bytes with little-endian encoding. +type Uint40 [5]byte + +// NewUint40 creates a new Uint40 from a uint64. +func NewUint40(v uint64) Uint40 { + if v>>40 != 0 { + panic(fmt.Sprintf("value %d overflows Uint40", v)) + } + var u Uint40 + u[0] = byte(v) + u[1] = byte(v >> 8) + u[2] = byte(v >> 16) + u[3] = byte(v >> 24) + u[4] = byte(v >> 32) + return u +} + +// ToUint64 converts the Uint40 to a uint64. +func (u Uint40) ToUint64() uint64 { + return uint64(u[0]) | uint64(u[1])<<8 | uint64(u[2])<<16 | uint64(u[3])<<24 | uint64(u[4])<<32 +} + +// String implements fmt.Stringer. +func (u Uint40) String() string { + return fmt.Sprintf("%d", u.ToUint64()) +} diff --git a/iavl/internal/uint40_test.go b/iavl/internal/uint40_test.go new file mode 100644 index 0000000000..0a41a54e91 --- /dev/null +++ b/iavl/internal/uint40_test.go @@ -0,0 +1,50 @@ +package internal + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUint40(t *testing.T) { + tests := []struct { + name string + value uint64 + expectPanic bool + str string + }{ + { + name: "zero", + str: "0", + }, + { + name: "max", + value: 1<<40 - 1, + str: "1099511627775", + }, + { + name: "arbitrary", + value: 109951162777, + str: "109951162777", + }, + { + name: "overflow", + value: 1 << 40, + expectPanic: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.expectPanic { + require.Panics(t, func() { + _ = NewUint40(tt.value) + }) + } else { + u := NewUint40(tt.value) + got := u.ToUint64() + require.Equal(t, tt.value, got) + require.Equal(t, tt.str, u.String()) + } + }) + } +}