From b27c861485695d3f5bb92bcb281abc95f4d90fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 2 Apr 2024 18:21:21 +0200 Subject: [PATCH] curio: Cleanup proving config (#11751) --- curiosrc/builder.go | 2 +- documentation/en/default-curio-config.toml | 19 ---- node/config/def.go | 2 +- node/config/doc_gen.go | 105 ++++++++++++++++++++- node/config/types.go | 86 ++++++++++++++++- 5 files changed, 191 insertions(+), 23 deletions(-) diff --git a/curiosrc/builder.go b/curiosrc/builder.go index 3a3294a29..3cd4bd0cd 100644 --- a/curiosrc/builder.go +++ b/curiosrc/builder.go @@ -19,7 +19,7 @@ import ( //var log = logging.Logger("provider") -func WindowPostScheduler(ctx context.Context, fc config.CurioFees, pc config.ProvingConfig, +func WindowPostScheduler(ctx context.Context, fc config.CurioFees, pc config.CurioProvingConfig, api api.FullNode, verif storiface.Verifier, lw *sealer.LocalWorker, sender *message.Sender, chainSched *chainsched.CurioChainSched, as *multictladdr.MultiAddressSelector, addresses map[dtypes.MinerAddress]bool, db *harmonydb.DB, stor paths.Store, idx paths.SectorIndex, max int) (*window.WdPostTask, *window.WdPostSubmitTask, *window.WdPostRecoverDeclareTask, error) { diff --git a/documentation/en/default-curio-config.toml b/documentation/en/default-curio-config.toml index 83a7c243d..11c073696 100644 --- a/documentation/en/default-curio-config.toml +++ b/documentation/en/default-curio-config.toml @@ -235,25 +235,6 @@ # type: Duration #PartitionCheckTimeout = "20m0s" - # Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present. - # - # WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need - # to be recovered. Before enabling this option, make sure your PoSt workers work correctly. - # - # After changing this option, confirm that the new value works in your setup by invoking - # 'lotus-miner proving compute window-post 0' - # - # type: bool - #DisableBuiltinWindowPoSt = false - - # Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present. - # - # WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. - # Before enabling this option, make sure your PoSt workers work correctly. - # - # type: bool - #DisableBuiltinWinningPoSt = false - # Disable WindowPoSt provable sector readability checks. # # In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges diff --git a/node/config/def.go b/node/config/def.go index f725f60d3..661bed56f 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -356,7 +356,7 @@ func DefaultCurioConfig() *CurioConfig { TerminateControl: []string{}, MinerAddresses: []string{}, }}, - Proving: ProvingConfig{ + Proving: CurioProvingConfig{ ParallelCheckLimit: 32, PartitionCheckTimeout: Duration(20 * time.Minute), SingleCheckTimeout: Duration(10 * time.Minute), diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index b1b91d9c9..b61a136ce 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -228,7 +228,7 @@ over the worker address if this flag is set.`, }, { Name: "Proving", - Type: "ProvingConfig", + Type: "CurioProvingConfig", Comment: ``, }, @@ -295,6 +295,109 @@ over the worker address if this flag is set.`, Comment: ``, }, }, + "CurioProvingConfig": { + { + Name: "ParallelCheckLimit", + Type: "int", + + Comment: `Maximum number of sector checks to run in parallel. (0 = unlimited) + +WARNING: Setting this value too high may make the node crash by running out of stack +WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due +to late submission. + +After changing this option, confirm that the new value works in your setup by invoking +'lotus-miner proving compute window-post 0'`, + }, + { + Name: "SingleCheckTimeout", + Type: "Duration", + + Comment: `Maximum amount of time a proving pre-check can take for a sector. If the check times out the sector will be skipped + +WARNING: Setting this value too low risks in sectors being skipped even though they are accessible, just reading the +test challenge took longer than this timeout +WARNING: Setting this value too high risks missing PoSt deadline in case IO operations related to this sector are +blocked (e.g. in case of disconnected NFS mount)`, + }, + { + Name: "PartitionCheckTimeout", + Type: "Duration", + + Comment: `Maximum amount of time a proving pre-check can take for an entire partition. If the check times out, sectors in +the partition which didn't get checked on time will be skipped + +WARNING: Setting this value too low risks in sectors being skipped even though they are accessible, just reading the +test challenge took longer than this timeout +WARNING: Setting this value too high risks missing PoSt deadline in case IO operations related to this partition are +blocked or slow`, + }, + { + Name: "DisableWDPoStPreChecks", + Type: "bool", + + Comment: `Disable WindowPoSt provable sector readability checks. + +In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges +from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as +we're only interested in checking that sector data can be read. + +When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process +can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by +the builtin logic not skipping snark computation when some sectors need to be skipped. + +When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and +if challenges for some sectors aren't readable, those sectors will just get skipped. + +Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter +time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should +be negligible. + +NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers. + +NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is +sent to the chain + +After changing this option, confirm that the new value works in your setup by invoking +'lotus-miner proving compute window-post 0'`, + }, + { + Name: "MaxPartitionsPerPoStMessage", + Type: "int", + + Comment: `Maximum number of partitions to prove in a single SubmitWindowPoSt messace. 0 = network limit (3 in nv21) + +A single partition may contain up to 2349 32GiB sectors, or 2300 64GiB sectors. +// +Note that setting this value lower may result in less efficient gas use - more messages will be sent, +to prove each deadline, resulting in more total gas use (but each message will have lower gas limit) + +Setting this value above the network limit has no effect`, + }, + { + Name: "MaxPartitionsPerRecoveryMessage", + Type: "int", + + Comment: `In some cases when submitting DeclareFaultsRecovered messages, +there may be too many recoveries to fit in a BlockGasLimit. +In those cases it may be necessary to set this value to something low (eg 1); +Note that setting this value lower may result in less efficient gas use - more messages will be sent than needed, +resulting in more total gas use (but each message will have lower gas limit)`, + }, + { + Name: "SingleRecoveringPartitionPerPostMessage", + Type: "bool", + + Comment: `Enable single partition per PoSt Message for partitions containing recovery sectors + +In cases when submitting PoSt messages which contain recovering sectors, the default network limit may still be +too high to fit in the block gas limit. In those cases, it becomes useful to only house the single partition +with recovering sectors in the post message + +Note that setting this value lower may result in less efficient gas use - more messages will be sent, +to prove each deadline, resulting in more total gas use (but each message will have lower gas limit)`, + }, + }, "CurioSubsystemsConfig": { { Name: "EnableWindowPost", diff --git a/node/config/types.go b/node/config/types.go index 3872bffde..09e8d7800 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -73,7 +73,7 @@ type CurioConfig struct { // Addresses of wallets per MinerAddress (one of the fields). Addresses []CurioAddresses - Proving ProvingConfig + Proving CurioProvingConfig Journal JournalConfig Apis ApisConfig } @@ -723,6 +723,90 @@ type CurioAddresses struct { MinerAddresses []string } +type CurioProvingConfig struct { + // Maximum number of sector checks to run in parallel. (0 = unlimited) + // + // WARNING: Setting this value too high may make the node crash by running out of stack + // WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due + // to late submission. + // + // After changing this option, confirm that the new value works in your setup by invoking + // 'lotus-miner proving compute window-post 0' + ParallelCheckLimit int + + // Maximum amount of time a proving pre-check can take for a sector. If the check times out the sector will be skipped + // + // WARNING: Setting this value too low risks in sectors being skipped even though they are accessible, just reading the + // test challenge took longer than this timeout + // WARNING: Setting this value too high risks missing PoSt deadline in case IO operations related to this sector are + // blocked (e.g. in case of disconnected NFS mount) + SingleCheckTimeout Duration + + // Maximum amount of time a proving pre-check can take for an entire partition. If the check times out, sectors in + // the partition which didn't get checked on time will be skipped + // + // WARNING: Setting this value too low risks in sectors being skipped even though they are accessible, just reading the + // test challenge took longer than this timeout + // WARNING: Setting this value too high risks missing PoSt deadline in case IO operations related to this partition are + // blocked or slow + PartitionCheckTimeout Duration + + // Disable WindowPoSt provable sector readability checks. + // + // In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges + // from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as + // we're only interested in checking that sector data can be read. + // + // When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process + // can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by + // the builtin logic not skipping snark computation when some sectors need to be skipped. + // + // When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and + // if challenges for some sectors aren't readable, those sectors will just get skipped. + // + // Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter + // time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should + // be negligible. + // + // NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers. + // + // NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is + // sent to the chain + // + // After changing this option, confirm that the new value works in your setup by invoking + // 'lotus-miner proving compute window-post 0' + DisableWDPoStPreChecks bool + + // Maximum number of partitions to prove in a single SubmitWindowPoSt messace. 0 = network limit (3 in nv21) + // + // A single partition may contain up to 2349 32GiB sectors, or 2300 64GiB sectors. + // // + // Note that setting this value lower may result in less efficient gas use - more messages will be sent, + // to prove each deadline, resulting in more total gas use (but each message will have lower gas limit) + // + // Setting this value above the network limit has no effect + MaxPartitionsPerPoStMessage int + + // Maximum number of partitions to declare in a single DeclareFaultsRecovered message. 0 = no limit. + + // In some cases when submitting DeclareFaultsRecovered messages, + // there may be too many recoveries to fit in a BlockGasLimit. + // In those cases it may be necessary to set this value to something low (eg 1); + // Note that setting this value lower may result in less efficient gas use - more messages will be sent than needed, + // resulting in more total gas use (but each message will have lower gas limit) + MaxPartitionsPerRecoveryMessage int + + // Enable single partition per PoSt Message for partitions containing recovery sectors + // + // In cases when submitting PoSt messages which contain recovering sectors, the default network limit may still be + // too high to fit in the block gas limit. In those cases, it becomes useful to only house the single partition + // with recovering sectors in the post message + // + // Note that setting this value lower may result in less efficient gas use - more messages will be sent, + // to prove each deadline, resulting in more total gas use (but each message will have lower gas limit) + SingleRecoveringPartitionPerPostMessage bool +} + // API contains configs for API endpoint type API struct { // Binding address for the Lotus API