Always use a separate database for blobs (#4892)

* Always use a separate blobs DB

* Add + update tests
This commit is contained in:
Michael Sproul 2023-11-09 16:51:36 +11:00 committed by GitHub
parent 1b8c0ed987
commit 051c3e842f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 89 additions and 53 deletions

View File

@ -771,12 +771,13 @@ mod test {
) -> Arc<HotColdDB<E, LevelDB<E>, LevelDB<E>>> {
let hot_path = db_path.path().join("hot_db");
let cold_path = db_path.path().join("cold_db");
let blobs_path = db_path.path().join("blobs_db");
let config = StoreConfig::default();
HotColdDB::open(
&hot_path,
&cold_path,
None,
&blobs_path,
|_, _, _| Ok(()),
config,
spec,

View File

@ -29,12 +29,13 @@ fn get_store(db_path: &TempDir) -> Arc<HotColdDB> {
let spec = test_spec::<E>();
let hot_path = db_path.path().join("hot_db");
let cold_path = db_path.path().join("cold_db");
let blobs_path = db_path.path().join("blobs_db");
let config = StoreConfig::default();
let log = NullLoggerBuilder.build().expect("logger should build");
HotColdDB::open(
&hot_path,
&cold_path,
None,
&blobs_path,
|_, _, _| Ok(()),
config,
spec,

View File

@ -31,7 +31,7 @@ use store::{
chunked_vector::{chunk_key, Field},
get_key_for_col,
iter::{BlockRootsIterator, StateRootsIterator},
DBColumn, HotColdDB, KeyValueStore, KeyValueStoreOp, LevelDB, StoreConfig,
BlobInfo, DBColumn, HotColdDB, KeyValueStore, KeyValueStoreOp, LevelDB, StoreConfig,
};
use tempfile::{tempdir, TempDir};
use tokio::time::sleep;
@ -62,12 +62,13 @@ fn get_store_generic(
) -> Arc<HotColdDB<E, LevelDB<E>, LevelDB<E>>> {
let hot_path = db_path.path().join("hot_db");
let cold_path = db_path.path().join("cold_db");
let blobs_path = db_path.path().join("blobs_db");
let log = test_logger();
HotColdDB::open(
&hot_path,
&cold_path,
None,
&blobs_path,
|_, _, _| Ok(()),
config,
spec,
@ -3278,6 +3279,40 @@ async fn deneb_prune_blobs_margin_test(margin: u64) {
check_blob_existence(&harness, oldest_blob_slot, harness.head_slot(), true);
}
/// Check that a database with `blobs_db=false` can be upgraded to `blobs_db=true` before Deneb.
#[tokio::test]
async fn change_to_separate_blobs_db_before_deneb() {
let db_path = tempdir().unwrap();
let store = get_store(&db_path);
// Only run this test on forks prior to Deneb. If the blobs database already has blobs, we can't
// move it.
if store.get_chain_spec().deneb_fork_epoch.is_some() {
return;
}
let init_blob_info = store.get_blob_info();
assert!(
init_blob_info.blobs_db,
"separate blobs DB should be the default"
);
// Change to `blobs_db=false` to emulate legacy Deneb DB.
let legacy_blob_info = BlobInfo {
blobs_db: false,
..init_blob_info
};
store
.compare_and_set_blob_info_with_write(init_blob_info.clone(), legacy_blob_info.clone())
.unwrap();
assert_eq!(store.get_blob_info(), legacy_blob_info);
// Re-open the DB and check that `blobs_db` gets changed back to true.
drop(store);
let store = get_store(&db_path);
assert_eq!(store.get_blob_info(), init_blob_info);
}
/// Check that there are blob sidecars (or not) at every slot in the range.
fn check_blob_existence(
harness: &TestHarness,

View File

@ -901,7 +901,7 @@ where
mut self,
hot_path: &Path,
cold_path: &Path,
blobs_path: Option<PathBuf>,
blobs_path: &Path,
config: StoreConfig,
log: Logger,
) -> Result<Self, String> {

View File

@ -10,8 +10,11 @@ use std::fs;
use std::path::PathBuf;
use std::time::Duration;
use types::Graffiti;
/// Default directory name for the freezer database under the top-level data dir.
const DEFAULT_FREEZER_DB_DIR: &str = "freezer_db";
/// Default directory name for the blobs database under the top-level data dir.
const DEFAULT_BLOBS_DB_DIR: &str = "blobs_db";
/// Defines how the client should initialize the `BeaconChain` and other components.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -146,12 +149,19 @@ impl Config {
.unwrap_or_else(|| self.default_freezer_db_path())
}
/// Fetch default path to use for the blobs database.
fn default_blobs_db_path(&self) -> PathBuf {
self.get_data_dir().join(DEFAULT_BLOBS_DB_DIR)
}
/// Returns the path to which the client may initialize the on-disk blobs database.
///
/// Will attempt to use the user-supplied path from e.g. the CLI, or will default
/// to None.
pub fn get_blobs_db_path(&self) -> Option<PathBuf> {
self.blobs_db_path.clone()
pub fn get_blobs_db_path(&self) -> PathBuf {
self.blobs_db_path
.clone()
.unwrap_or_else(|| self.default_blobs_db_path())
}
/// Get the freezer DB path, creating it if necessary.
@ -160,11 +170,8 @@ impl Config {
}
/// Get the blobs DB path, creating it if necessary.
pub fn create_blobs_db_path(&self) -> Result<Option<PathBuf>, String> {
match self.get_blobs_db_path() {
Some(blobs_db_path) => Ok(Some(ensure_dir_exists(blobs_db_path)?)),
None => Ok(None),
}
pub fn create_blobs_db_path(&self) -> Result<PathBuf, String> {
ensure_dir_exists(self.get_blobs_db_path())
}
/// Returns the "modern" path to the data_dir.

View File

@ -89,7 +89,7 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
.disk_store(
&db_path,
&freezer_db_path,
blobs_db_path,
&blobs_db_path,
store_config,
log.clone(),
)?;

View File

@ -35,7 +35,7 @@ use state_processing::{
use std::cmp::min;
use std::convert::TryInto;
use std::marker::PhantomData;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use types::blob_sidecar::BlobSidecarList;
@ -61,7 +61,7 @@ pub struct HotColdDB<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
/// Cold database containing compact historical data.
pub cold_db: Cold,
/// Database containing blobs. If None, store falls back to use `cold_db`.
pub blobs_db: Option<Cold>,
pub blobs_db: Cold,
/// Hot database containing duplicated but quick-to-access recent data.
///
/// The hot database also contains all blocks.
@ -138,7 +138,6 @@ pub enum HotColdDBError {
MissingExecutionPayload(Hash256),
MissingFullBlockExecutionPayloadPruned(Hash256, Slot),
MissingAnchorInfo,
MissingPathToBlobsDatabase,
BlobsPreviouslyInDefaultStore,
HotStateSummaryError(BeaconStateError),
RestorePointDecodeError(ssz::DecodeError),
@ -178,7 +177,7 @@ impl<E: EthSpec> HotColdDB<E, MemoryStore<E>, MemoryStore<E>> {
anchor_info: RwLock::new(None),
blob_info: RwLock::new(BlobInfo::default()),
cold_db: MemoryStore::open(),
blobs_db: Some(MemoryStore::open()),
blobs_db: MemoryStore::open(),
hot_db: MemoryStore::open(),
block_cache: Mutex::new(BlockCache::new(config.block_cache_size)),
state_cache: Mutex::new(LruCache::new(config.historic_state_cache_size)),
@ -202,7 +201,7 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
pub fn open(
hot_path: &Path,
cold_path: &Path,
blobs_db_path: Option<PathBuf>,
blobs_db_path: &Path,
migrate_schema: impl FnOnce(Arc<Self>, SchemaVersion, SchemaVersion) -> Result<(), Error>,
config: StoreConfig,
spec: ChainSpec,
@ -215,7 +214,7 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
anchor_info: RwLock::new(None),
blob_info: RwLock::new(BlobInfo::default()),
cold_db: LevelDB::open(cold_path)?,
blobs_db: None,
blobs_db: LevelDB::open(blobs_db_path)?,
hot_db: LevelDB::open(hot_path)?,
block_cache: Mutex::new(BlockCache::new(config.block_cache_size)),
state_cache: Mutex::new(LruCache::new(config.historic_state_cache_size)),
@ -271,37 +270,29 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
Some(blob_info) => {
// If the oldest block slot is already set do not allow the blob DB path to be
// changed (require manual migration).
if blob_info.oldest_blob_slot.is_some() {
if blobs_db_path.is_some() && !blob_info.blobs_db {
return Err(HotColdDBError::BlobsPreviouslyInDefaultStore.into());
} else if blobs_db_path.is_none() && blob_info.blobs_db {
return Err(HotColdDBError::MissingPathToBlobsDatabase.into());
}
if blob_info.oldest_blob_slot.is_some() && !blob_info.blobs_db {
return Err(HotColdDBError::BlobsPreviouslyInDefaultStore.into());
}
// Set the oldest blob slot to the Deneb fork slot if it is not yet set.
// Always initialize `blobs_db` to true, we no longer support storing the blobs
// in the freezer DB, because the UX is strictly worse for relocating the DB.
let oldest_blob_slot = blob_info.oldest_blob_slot.or(deneb_fork_slot);
BlobInfo {
oldest_blob_slot,
blobs_db: blobs_db_path.is_some(),
blobs_db: true,
}
}
// First start.
None => BlobInfo {
// Set the oldest blob slot to the Deneb fork slot if it is not yet set.
oldest_blob_slot: deneb_fork_slot,
blobs_db: blobs_db_path.is_some(),
blobs_db: true,
},
};
if new_blob_info.blobs_db {
if let Some(path) = &blobs_db_path {
db.blobs_db = Some(LevelDB::open(path.as_path())?);
}
}
db.compare_and_set_blob_info_with_write(<_>::default(), new_blob_info.clone())?;
info!(
db.log,
"Blob DB initialized";
"separate_db" => new_blob_info.blobs_db,
"path" => ?blobs_db_path,
"oldest_blob_slot" => ?new_blob_info.oldest_blob_slot,
);
@ -575,8 +566,8 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
/// Check if the blobs for a block exists on disk.
pub fn blobs_exist(&self, block_root: &Hash256) -> Result<bool, Error> {
let blobs_db = self.blobs_db.as_ref().unwrap_or(&self.cold_db);
blobs_db.key_exists(DBColumn::BeaconBlob.into(), block_root.as_bytes())
self.blobs_db
.key_exists(DBColumn::BeaconBlob.into(), block_root.as_bytes())
}
/// Determine whether a block exists in the database.
@ -592,13 +583,12 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
.key_delete(DBColumn::BeaconBlock.into(), block_root.as_bytes())?;
self.hot_db
.key_delete(DBColumn::ExecPayload.into(), block_root.as_bytes())?;
let blobs_db = self.blobs_db.as_ref().unwrap_or(&self.cold_db);
blobs_db.key_delete(DBColumn::BeaconBlob.into(), block_root.as_bytes())
self.blobs_db
.key_delete(DBColumn::BeaconBlob.into(), block_root.as_bytes())
}
pub fn put_blobs(&self, block_root: &Hash256, blobs: BlobSidecarList<E>) -> Result<(), Error> {
let blobs_db = self.blobs_db.as_ref().unwrap_or(&self.cold_db);
blobs_db.put_bytes(
self.blobs_db.put_bytes(
DBColumn::BeaconBlob.into(),
block_root.as_bytes(),
&blobs.as_ssz_bytes(),
@ -988,9 +978,9 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
let mut guard = self.block_cache.lock();
let blob_cache_ops = blobs_ops.clone();
let blobs_db = self.blobs_db.as_ref().unwrap_or(&self.cold_db);
// Try to execute blobs store ops.
blobs_db.do_atomically(self.convert_to_kv_batch(blobs_ops)?)?;
self.blobs_db
.do_atomically(self.convert_to_kv_batch(blobs_ops)?)?;
let hot_db_cache_ops = hot_db_ops.clone();
// Try to execute hot db store ops.
@ -1018,7 +1008,8 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
};
*op = reverse_op;
}
blobs_db.do_atomically(self.convert_to_kv_batch(blob_cache_ops)?)?;
self.blobs_db
.do_atomically(self.convert_to_kv_batch(blob_cache_ops)?)?;
return Err(e);
}
@ -1436,15 +1427,16 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
/// Fetch blobs for a given block from the store.
pub fn get_blobs(&self, block_root: &Hash256) -> Result<Option<BlobSidecarList<E>>, Error> {
let blobs_db = self.blobs_db.as_ref().unwrap_or(&self.cold_db);
// Check the cache.
if let Some(blobs) = self.block_cache.lock().get_blobs(block_root) {
metrics::inc_counter(&metrics::BEACON_BLOBS_CACHE_HIT_COUNT);
return Ok(Some(blobs.clone()));
}
match blobs_db.get_bytes(DBColumn::BeaconBlob.into(), block_root.as_bytes())? {
match self
.blobs_db
.get_bytes(DBColumn::BeaconBlob.into(), block_root.as_bytes())?
{
Some(ref blobs_bytes) => {
let blobs = BlobSidecarList::from_ssz_bytes(blobs_bytes)?;
self.block_cache
@ -1640,7 +1632,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
});
let blob_info = BlobInfo {
oldest_blob_slot,
blobs_db: self.blobs_db.is_some(),
blobs_db: true,
};
self.compare_and_set_blob_info(self.get_blob_info(), blob_info)
}

View File

@ -135,7 +135,7 @@ pub struct BlobInfo {
/// If the `oldest_blob_slot` is `None` then this means that the Deneb fork epoch is not yet
/// known.
pub oldest_blob_slot: Option<Slot>,
/// A separate blobs database is in use.
/// A separate blobs database is in use (deprecated, always `true`).
pub blobs_db: bool,
}

View File

@ -210,7 +210,7 @@ pub fn display_db_version<E: EthSpec>(
HotColdDB::<E, LevelDB<E>, LevelDB<E>>::open(
&hot_path,
&cold_path,
blobs_path,
&blobs_path,
|_, from, _| {
version = from;
Ok(())
@ -288,7 +288,7 @@ pub fn inspect_db<E: EthSpec>(
let db = HotColdDB::<E, LevelDB<E>, LevelDB<E>>::open(
&hot_path,
&cold_path,
blobs_path,
&blobs_path,
|_, _, _| Ok(()),
client_config.store,
spec,
@ -410,7 +410,7 @@ pub fn migrate_db<E: EthSpec>(
let db = HotColdDB::<E, LevelDB<E>, LevelDB<E>>::open(
&hot_path,
&cold_path,
blobs_path,
&blobs_path,
|_, db_initial_version, _| {
from = db_initial_version;
Ok(())
@ -450,7 +450,7 @@ pub fn prune_payloads<E: EthSpec>(
let db = HotColdDB::<E, LevelDB<E>, LevelDB<E>>::open(
&hot_path,
&cold_path,
blobs_path,
&blobs_path,
|_, _, _| Ok(()),
client_config.store,
spec.clone(),
@ -476,7 +476,7 @@ pub fn prune_blobs<E: EthSpec>(
let db = HotColdDB::<E, LevelDB<E>, LevelDB<E>>::open(
&hot_path,
&cold_path,
blobs_path,
&blobs_path,
|_, _, _| Ok(()),
client_config.store,
spec.clone(),
@ -512,7 +512,7 @@ pub fn prune_states<E: EthSpec>(
let db = HotColdDB::<E, LevelDB<E>, LevelDB<E>>::open(
&hot_path,
&cold_path,
blobs_path,
&blobs_path,
|_, _, _| Ok(()),
client_config.store,
spec.clone(),