diff --git a/.circleci/config.yml b/.circleci/config.yml index 20701f7d5..70e435d3b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ version: 2.1 orbs: - aws-cli: circleci/aws-cli@1.3.2 - docker: circleci/docker@2.1.4 + aws-cli: circleci/aws-cli@4.1.1 + docker: circleci/docker@2.3.0 executors: golang: @@ -70,8 +70,6 @@ commands: name: Restore parameters cache keys: - 'v26-2k-lotus-params' - paths: - - /var/tmp/filecoin-proof-parameters/ - run: ./lotus fetch-params 2048 - save_cache: name: Save parameters cache @@ -96,6 +94,7 @@ commands: git fetch --all install-ubuntu-deps: steps: + - run: sudo apt install curl ca-certificates gnupg - run: sudo apt-get update - run: sudo apt-get install ocl-icd-opencl-dev libhwloc-dev check-go-version: @@ -143,9 +142,9 @@ jobs: Run tests with gotestsum. working_directory: ~/lotus parameters: &test-params - executor: - type: executor - default: golang + resource_class: + type: string + default: medium+ go-test-flags: type: string default: "-timeout 20m" @@ -164,7 +163,14 @@ jobs: type: string default: unit description: Test suite name to report to CircleCI. - executor: << parameters.executor >> + docker: + - image: cimg/go:1.20 + environment: + LOTUS_HARMONYDB_HOSTS: yugabyte + - image: yugabytedb/yugabyte:2.18.0.0-b65 + command: bin/yugabyted start --daemon=false + name: yugabyte + resource_class: << parameters.resource_class >> steps: - install-ubuntu-deps - attach_workspace: @@ -182,6 +188,8 @@ jobs: command: | mkdir -p /tmp/test-reports/<< parameters.suite >> mkdir -p /tmp/test-artifacts + dockerize -wait tcp://yugabyte:5433 -timeout 3m + env gotestsum \ --format standard-verbose \ --junitfile /tmp/test-reports/<< parameters.suite >>/junit.xml \ @@ -209,7 +217,9 @@ jobs: Branch on github.com/filecoin-project/test-vectors to checkout and test with. If empty (the default) the commit defined by the git submodule is used. - executor: << parameters.executor >> + docker: + - image: cimg/go:1.20 + resource_class: << parameters.resource_class >> steps: - install-ubuntu-deps - attach_workspace: @@ -396,15 +406,14 @@ jobs: Run golangci-lint. working_directory: ~/lotus parameters: - executor: - type: executor - default: golang args: type: string default: '' description: | Arguments to pass to golangci-lint - executor: << parameters.executor >> + docker: + - image: cimg/go:1.20 + resource_class: medium+ steps: - install-ubuntu-deps - attach_workspace: @@ -575,7 +584,7 @@ workflows: - build suite: itest-deals_concurrent target: "./itests/deals_concurrent_test.go" - executor: golang-2xl + resource_class: 2xlarge - test: name: test-itest-deals_invalid_utf8_label requires: @@ -768,6 +777,18 @@ workflows: - build suite: itest-get_messages_in_ts target: "./itests/get_messages_in_ts_test.go" + - test: + name: test-itest-harmonydb + requires: + - build + suite: itest-harmonydb + target: "./itests/harmonydb_test.go" + - test: + name: test-itest-harmonytask + requires: + - build + suite: itest-harmonytask + target: "./itests/harmonytask_test.go" - test: name: test-itest-lite_migration requires: @@ -976,14 +997,14 @@ workflows: - build suite: itest-wdpost_worker_config target: "./itests/wdpost_worker_config_test.go" - executor: golang-2xl + resource_class: 2xlarge - test: name: test-itest-worker requires: - build suite: itest-worker target: "./itests/worker_test.go" - executor: golang-2xl + resource_class: 2xlarge - test: name: test-itest-worker_upgrade requires: @@ -996,7 +1017,7 @@ workflows: - build suite: utest-unit-cli target: "./cli/... ./cmd/... ./api/..." - executor: golang-2xl + resource_class: 2xlarge get-params: true - test: name: test-unit-node @@ -1010,7 +1031,7 @@ workflows: - build suite: utest-unit-rest target: "./blockstore/... ./build/... ./chain/... ./conformance/... ./gateway/... ./journal/... ./lib/... ./markets/... ./paychmgr/... ./tools/..." - executor: golang-2xl + resource_class: 2xlarge - test: name: test-unit-storage requires: diff --git a/.circleci/gen.go b/.circleci/gen.go index 93f409df2..19329247a 100644 --- a/.circleci/gen.go +++ b/.circleci/gen.go @@ -10,11 +10,25 @@ import ( "text/template" ) +var GoVersion = "" // from init below. Ex: 1.19.7 + //go:generate go run ./gen.go .. //go:embed template.yml var templateFile embed.FS +func init() { + b, err := os.ReadFile("../go.mod") + if err != nil { + panic("cannot find go.mod in parent folder") + } + for _, line := range strings.Split(string(b), "\n") { + if strings.HasPrefix(line, "go ") { + GoVersion = line[3:] + } + } +} + type ( dirs = []string suite = string @@ -111,6 +125,7 @@ func main() { Networks []string ItestFiles []string UnitSuites map[string]string + GoVersion string } in := data{ Networks: []string{"mainnet", "butterflynet", "calibnet", "debug"}, @@ -125,6 +140,7 @@ func main() { } return ret }(), + GoVersion: GoVersion, } out, err := os.Create("./config.yml") diff --git a/.circleci/template.yml b/.circleci/template.yml index 0b244d013..9011f1a86 100644 --- a/.circleci/template.yml +++ b/.circleci/template.yml @@ -1,7 +1,7 @@ version: 2.1 orbs: - aws-cli: circleci/aws-cli@1.3.2 - docker: circleci/docker@2.1.4 + aws-cli: circleci/aws-cli@4.1.1 + docker: circleci/docker@2.3.0 executors: golang: @@ -70,8 +70,6 @@ commands: name: Restore parameters cache keys: - 'v26-2k-lotus-params' - paths: - - /var/tmp/filecoin-proof-parameters/ - run: ./lotus fetch-params 2048 - save_cache: name: Save parameters cache @@ -96,6 +94,7 @@ commands: git fetch --all install-ubuntu-deps: steps: + - run: sudo apt install curl ca-certificates gnupg - run: sudo apt-get update - run: sudo apt-get install ocl-icd-opencl-dev libhwloc-dev check-go-version: @@ -143,9 +142,9 @@ jobs: Run tests with gotestsum. working_directory: ~/lotus parameters: &test-params - executor: - type: executor - default: golang + resource_class: + type: string + default: medium+ go-test-flags: type: string default: "-timeout 20m" @@ -164,7 +163,14 @@ jobs: type: string default: unit description: Test suite name to report to CircleCI. - executor: << parameters.executor >> + docker: + - image: cimg/go:[[ .GoVersion]] + environment: + LOTUS_HARMONYDB_HOSTS: yugabyte + - image: yugabytedb/yugabyte:2.18.0.0-b65 + command: bin/yugabyted start --daemon=false + name: yugabyte + resource_class: << parameters.resource_class >> steps: - install-ubuntu-deps - attach_workspace: @@ -182,6 +188,8 @@ jobs: command: | mkdir -p /tmp/test-reports/<< parameters.suite >> mkdir -p /tmp/test-artifacts + dockerize -wait tcp://yugabyte:5433 -timeout 3m + env gotestsum \ --format standard-verbose \ --junitfile /tmp/test-reports/<< parameters.suite >>/junit.xml \ @@ -209,7 +217,9 @@ jobs: Branch on github.com/filecoin-project/test-vectors to checkout and test with. If empty (the default) the commit defined by the git submodule is used. - executor: << parameters.executor >> + docker: + - image: cimg/go:[[ .GoVersion]] + resource_class: << parameters.resource_class >> steps: - install-ubuntu-deps - attach_workspace: @@ -396,15 +406,14 @@ jobs: Run golangci-lint. working_directory: ~/lotus parameters: - executor: - type: executor - default: golang args: type: string default: '' description: | Arguments to pass to golangci-lint - executor: << parameters.executor >> + docker: + - image: cimg/go:[[ .GoVersion]] + resource_class: medium+ steps: - install-ubuntu-deps - attach_workspace: @@ -543,7 +552,7 @@ workflows: suite: itest-[[ $name ]] target: "./itests/[[ $file ]]" [[- if or (eq $name "worker") (eq $name "deals_concurrent") (eq $name "wdpost_worker_config")]] - executor: golang-2xl + resource_class: 2xlarge [[- end]] [[- if or (eq $name "wdpost") (eq $name "sector_pledge")]] get-params: true @@ -561,11 +570,11 @@ workflows: get-params: true [[- end -]] [[- if eq $suite "unit-cli"]] - executor: golang-2xl + resource_class: 2xlarge get-params: true [[- end -]] [[- if eq $suite "unit-rest"]] - executor: golang-2xl + resource_class: 2xlarge [[- end -]] [[- end]] - test: diff --git a/.gitignore b/.gitignore index 01a3a03ff..c40a76fd0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ /lotus-chainwatch /lotus-shed /lotus-sim +/lotus-provider /lotus-townhall /lotus-fountain /lotus-stats diff --git a/CHANGELOG.md b/CHANGELOG.md index 6866631d0..6fb092922 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ The full list of [protocol improvements delivered in the network upgrade can be ## ☢️ Upgrade Warnings ☢️ - Read through the [changelog of the mandatory v1.24.0 release](https://github.com/filecoin-project/lotus/releases/tag/v1.24.0). Especially the `Migration` and `v12 Builtin Actor Bundle` sections. -- Please remove and clone a new Lotus repo (`git clone https://github.com/filecoin-project/lotus.git`) when upgrading to this release. +- Please remove and clone a new Lotus repo (`git clone https://github.com/filecoin-project/lotus.git`) when upgrading to this release. - This feature release requires a minimum Go version of v1.20.7 or higher to successfully build Lotus. Go version 1.21.x is not supported yet. - EthRPC providers, please check out the [new tracing API to Lotus RPC](https://github.com/filecoin-project/lotus/pull/11100) @@ -190,7 +190,7 @@ account bafk2bzaceboftg75mdiba7xbo2i3uvgtca4brhnr3u5ptihonixgpnrvhpxoa init bafk2bzacebllyegx5r6lggf6ymyetbp7amacwpuxakhtjvjtvoy2bfkzk3vms ``` -## Migration +## Migration We are expecting a heavier than normal state migration for this upgrade due to the amount of state changes introduced for miner sector info. (This is a similar migration as the Shark upgrade, however, we have introduced a couple of migration performance optimizations since then for a smoother upgrade experience.) @@ -209,7 +209,7 @@ You can check out the [tutorial for benchmarking the network migration here.](ht ## BREAKING CHANGE -There is a new protocol limit on how many partition could be submited in one PoSt - if you have any customized tooling for batching PoSts, please update accordingly. +There is a new protocol limit on how many partition could be submited in one PoSt - if you have any customized tooling for batching PoSts, please update accordingly. - feat: limit PoSted partitions to 3 ([filecoin-project/lotus#11327](https://github.com/filecoin-project/lotus/pull/11327)) ## New features @@ -221,7 +221,7 @@ There is a new protocol limit on how many partition could be submited in one PoS ## Improvements - Backport: feat: sealing: Switch to calling PreCommitSectorBatch2 ([filecoin-project/lotus#11215](https://github.com/filecoin-project/lotus/pull/11215)) -- updated the boostrap nodes +- updated the boostrap nodes ## Dependencies - github.com/filecoin-project/go-amt-ipld/v4 (v4.0.0 -> v4.2.0) @@ -231,9 +231,9 @@ There is a new protocol limit on how many partition could be submited in one PoS - chore: deps: update libp2p to v0.30.0 #11434 -## Snapshots +## Snapshots -The [Forest team](https://filecoinproject.slack.com/archives/C029LPZ5N73) at Chainsafe has launched a brand new lightweight snapshot service that is backed up by forest nodes! This is a great alternative service along with the fil-infra one, and it is compatible with lotus! We recommend lotus users to check it out [here](https://docs.filecoin.io/networks/mainnet#resources)! +The [Forest team](https://filecoinproject.slack.com/archives/C029LPZ5N73) at Chainsafe has launched a brand new lightweight snapshot service that is backed up by forest nodes! This is a great alternative service along with the fil-infra one, and it is compatible with lotus! We recommend lotus users to check it out [here](https://docs.filecoin.io/networks/mainnet#resources)! diff --git a/Dockerfile b/Dockerfile index 00930fb0f..c9750a71f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -109,6 +109,7 @@ COPY --from=lotus-builder /opt/filecoin/lotus-wallet /usr/local/bin/ COPY --from=lotus-builder /opt/filecoin/lotus-gateway /usr/local/bin/ COPY --from=lotus-builder /opt/filecoin/lotus-miner /usr/local/bin/ COPY --from=lotus-builder /opt/filecoin/lotus-worker /usr/local/bin/ +COPY --from=lotus-builder /opt/filecoin/lotus-provider /usr/local/bin/ COPY --from=lotus-builder /opt/filecoin/lotus-stats /usr/local/bin/ COPY --from=lotus-builder /opt/filecoin/lotus-fountain /usr/local/bin/ @@ -117,11 +118,13 @@ RUN mkdir /var/lib/lotus RUN mkdir /var/lib/lotus-miner RUN mkdir /var/lib/lotus-worker RUN mkdir /var/lib/lotus-wallet +RUN mkdir /var/lib/lotus-provider RUN chown fc: /var/tmp/filecoin-proof-parameters RUN chown fc: /var/lib/lotus RUN chown fc: /var/lib/lotus-miner RUN chown fc: /var/lib/lotus-worker RUN chown fc: /var/lib/lotus-wallet +RUN chown fc: /var/lib/lotus-provider VOLUME /var/tmp/filecoin-proof-parameters @@ -129,6 +132,7 @@ VOLUME /var/lib/lotus VOLUME /var/lib/lotus-miner VOLUME /var/lib/lotus-worker VOLUME /var/lib/lotus-wallet +VOLUME /var/lib/lotus-provider EXPOSE 1234 EXPOSE 2345 diff --git a/Makefile b/Makefile index b94c13c0d..68d97227b 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ CLEAN+=build/.update-modules deps: $(BUILD_DEPS) .PHONY: deps -build-devnets: build lotus-seed lotus-shed +build-devnets: build lotus-seed lotus-shed lotus-provider .PHONY: build-devnets debug: GOFLAGS+=-tags=debug @@ -97,6 +97,13 @@ lotus-miner: $(BUILD_DEPS) .PHONY: lotus-miner BINS+=lotus-miner +lotus-provider: $(BUILD_DEPS) + rm -f lotus-provider + $(GOCC) build $(GOFLAGS) -o lotus-provider ./cmd/lotus-provider + +lp2k: GOFLAGS+=-tags=2k +lp2k: lotus-provider + lotus-worker: $(BUILD_DEPS) rm -f lotus-worker $(GOCC) build $(GOFLAGS) -o lotus-worker ./cmd/lotus-worker @@ -115,13 +122,13 @@ lotus-gateway: $(BUILD_DEPS) .PHONY: lotus-gateway BINS+=lotus-gateway -build: lotus lotus-miner lotus-worker +build: lotus lotus-miner lotus-worker @[[ $$(type -P "lotus") ]] && echo "Caution: you have \ an existing lotus binary in your PATH. This may cause problems if you don't run 'sudo make install'" || true .PHONY: build -install: install-daemon install-miner install-worker +install: install-daemon install-miner install-worker install-provider install-daemon: install -C ./lotus /usr/local/bin/lotus @@ -129,6 +136,9 @@ install-daemon: install-miner: install -C ./lotus-miner /usr/local/bin/lotus-miner +install-provider: + install -C ./lotus-provider /usr/local/bin/lotus-provider + install-worker: install -C ./lotus-worker /usr/local/bin/lotus-worker @@ -144,6 +154,9 @@ uninstall-daemon: uninstall-miner: rm -f /usr/local/bin/lotus-miner +uninstall-provider: + rm -f /usr/local/bin/lotus-provider + uninstall-worker: rm -f /usr/local/bin/lotus-worker @@ -241,6 +254,14 @@ install-miner-service: install-miner install-daemon-service @echo @echo "lotus-miner service installed. Don't forget to run 'sudo systemctl start lotus-miner' to start it and 'sudo systemctl enable lotus-miner' for it to be enabled on startup." +install-provider-service: install-provider install-daemon-service + mkdir -p /etc/systemd/system + mkdir -p /var/log/lotus + install -C -m 0644 ./scripts/lotus-provider.service /etc/systemd/system/lotus-provider.service + systemctl daemon-reload + @echo + @echo "lotus-provider service installed. Don't forget to run 'sudo systemctl start lotus-provider' to start it and 'sudo systemctl enable lotus-provider' for it to be enabled on startup." + install-main-services: install-miner-service install-all-services: install-main-services @@ -259,6 +280,12 @@ clean-miner-service: rm -f /etc/systemd/system/lotus-miner.service systemctl daemon-reload +clean-provider-service: + -systemctl stop lotus-provider + -systemctl disable lotus-provider + rm -f /etc/systemd/system/lotus-provider.service + systemctl daemon-reload + clean-main-services: clean-daemon-service clean-all-services: clean-main-services @@ -294,7 +321,8 @@ actors-code-gen: $(GOCC) run ./chain/actors/agen $(GOCC) fmt ./... -actors-gen: actors-code-gen fiximports +actors-gen: actors-code-gen + ./scripts/fiximports .PHONY: actors-gen bundle-gen: @@ -354,21 +382,23 @@ docsgen-openrpc-gateway: docsgen-openrpc-bin fiximports: ./scripts/fiximports -gen: actors-code-gen type-gen cfgdoc-gen docsgen api-gen circleci fiximports +gen: actors-code-gen type-gen cfgdoc-gen docsgen api-gen circleci + ./scripts/fiximports @echo ">>> IF YOU'VE MODIFIED THE CLI OR CONFIG, REMEMBER TO ALSO RUN 'make docsgen-cli'" .PHONY: gen jen: gen -snap: lotus lotus-miner lotus-worker +snap: lotus lotus-miner lotus-worker lotus-provider snapcraft # snapcraft upload ./lotus_*.snap # separate from gen because it needs binaries -docsgen-cli: lotus lotus-miner lotus-worker +docsgen-cli: lotus lotus-miner lotus-worker lotus-provider python3 ./scripts/generate-lotus-cli.py ./lotus config default > documentation/en/default-lotus-config.toml ./lotus-miner config default > documentation/en/default-lotus-miner-config.toml + ./lotus-provider config default > documentation/en/default-lotus-provider-config.toml .PHONY: docsgen-cli print-%: diff --git a/api/api_lp.go b/api/api_lp.go new file mode 100644 index 000000000..8b58379f8 --- /dev/null +++ b/api/api_lp.go @@ -0,0 +1,10 @@ +package api + +import "context" + +type LotusProvider interface { + Version(context.Context) (Version, error) //perm:admin + + // Trigger shutdown + Shutdown(context.Context) error //perm:admin +} diff --git a/api/client/client.go b/api/client/client.go index 8b159c5b1..4d51221f9 100644 --- a/api/client/client.go +++ b/api/client/client.go @@ -15,6 +15,16 @@ import ( "github.com/filecoin-project/lotus/lib/rpcenc" ) +// NewProviderRpc creates a new http jsonrpc client. +func NewProviderRpc(ctx context.Context, addr string, requestHeader http.Header) (api.LotusProvider, jsonrpc.ClientCloser, error) { + var res v1api.LotusProviderStruct + + closer, err := jsonrpc.NewMergeClient(ctx, addr, "Filecoin", + api.GetInternalStructs(&res), requestHeader, jsonrpc.WithErrors(api.RPCErrors)) + + return &res, closer, err +} + // NewCommonRPCV0 creates a new http jsonrpc client. func NewCommonRPCV0(ctx context.Context, addr string, requestHeader http.Header) (api.CommonNet, jsonrpc.ClientCloser, error) { var res v0api.CommonNetStruct diff --git a/api/permissioned.go b/api/permissioned.go index 72d2239ee..f189cd78f 100644 --- a/api/permissioned.go +++ b/api/permissioned.go @@ -41,6 +41,12 @@ func PermissionedWorkerAPI(a Worker) Worker { return &out } +func PermissionedAPI[T, P any](a T) *P { + var out P + permissionedProxies(a, &out) + return &out +} + func PermissionedWalletAPI(a Wallet) Wallet { var out WalletStruct permissionedProxies(a, &out) diff --git a/api/proxy_gen.go b/api/proxy_gen.go index 8adcbc189..6627a5afe 100644 --- a/api/proxy_gen.go +++ b/api/proxy_gen.go @@ -831,6 +831,19 @@ type GatewayMethods struct { type GatewayStub struct { } +type LotusProviderStruct struct { + Internal LotusProviderMethods +} + +type LotusProviderMethods struct { + Shutdown func(p0 context.Context) error `perm:"admin"` + + Version func(p0 context.Context) (Version, error) `perm:"admin"` +} + +type LotusProviderStub struct { +} + type NetStruct struct { Internal NetMethods } @@ -5214,6 +5227,28 @@ func (s *GatewayStub) Web3ClientVersion(p0 context.Context) (string, error) { return "", ErrNotSupported } +func (s *LotusProviderStruct) Shutdown(p0 context.Context) error { + if s.Internal.Shutdown == nil { + return ErrNotSupported + } + return s.Internal.Shutdown(p0) +} + +func (s *LotusProviderStub) Shutdown(p0 context.Context) error { + return ErrNotSupported +} + +func (s *LotusProviderStruct) Version(p0 context.Context) (Version, error) { + if s.Internal.Version == nil { + return *new(Version), ErrNotSupported + } + return s.Internal.Version(p0) +} + +func (s *LotusProviderStub) Version(p0 context.Context) (Version, error) { + return *new(Version), ErrNotSupported +} + func (s *NetStruct) ID(p0 context.Context) (peer.ID, error) { if s.Internal.ID == nil { return *new(peer.ID), ErrNotSupported @@ -7442,6 +7477,7 @@ var _ CommonNet = new(CommonNetStruct) var _ EthSubscriber = new(EthSubscriberStruct) var _ FullNode = new(FullNodeStruct) var _ Gateway = new(GatewayStruct) +var _ LotusProvider = new(LotusProviderStruct) var _ Net = new(NetStruct) var _ Signable = new(SignableStruct) var _ StorageMiner = new(StorageMinerStruct) diff --git a/api/v1api/latest.go b/api/v1api/latest.go index aefb1543b..b8eeed2de 100644 --- a/api/v1api/latest.go +++ b/api/v1api/latest.go @@ -12,3 +12,5 @@ type RawFullNodeAPI FullNode func PermissionedFullAPI(a FullNode) FullNode { return api.PermissionedFullAPI(a) } + +type LotusProviderStruct = api.LotusProviderStruct diff --git a/api/version.go b/api/version.go index 9c2113578..e968bf93b 100644 --- a/api/version.go +++ b/api/version.go @@ -59,6 +59,8 @@ var ( MinerAPIVersion0 = newVer(1, 5, 0) WorkerAPIVersion0 = newVer(1, 7, 0) + + ProviderAPIVersion0 = newVer(1, 0, 0) ) //nolint:varcheck,deadcode diff --git a/blockstore/splitstore/splitstore_test.go b/blockstore/splitstore/splitstore_test.go index 63e77b47e..1b821654d 100644 --- a/blockstore/splitstore/splitstore_test.go +++ b/blockstore/splitstore/splitstore_test.go @@ -3,9 +3,9 @@ package splitstore import ( "context" + "crypto/rand" "errors" "fmt" - "math/rand" "sync" "sync/atomic" "testing" diff --git a/build/openrpc/full.json.gz b/build/openrpc/full.json.gz index 994266492..4e3f31fba 100644 Binary files a/build/openrpc/full.json.gz and b/build/openrpc/full.json.gz differ diff --git a/build/openrpc/gateway.json.gz b/build/openrpc/gateway.json.gz index 2a3e5c939..fe52522fd 100644 Binary files a/build/openrpc/gateway.json.gz and b/build/openrpc/gateway.json.gz differ diff --git a/build/openrpc/miner.json.gz b/build/openrpc/miner.json.gz index 6a82cdd95..30e2c6dd1 100644 Binary files a/build/openrpc/miner.json.gz and b/build/openrpc/miner.json.gz differ diff --git a/build/openrpc/worker.json.gz b/build/openrpc/worker.json.gz index 2e6e690e5..de03ea05b 100644 Binary files a/build/openrpc/worker.json.gz and b/build/openrpc/worker.json.gz differ diff --git a/chain/actors/policy/policy.go b/chain/actors/policy/policy.go index a0e4728fe..6d2b41154 100644 --- a/chain/actors/policy/policy.go +++ b/chain/actors/policy/policy.go @@ -867,6 +867,24 @@ func AggregatePreCommitNetworkFee(nwVer network.Version, aggregateSize int, base } } +var PoStToSealMap map[abi.RegisteredPoStProof]abi.RegisteredSealProof + +func init() { + PoStToSealMap = make(map[abi.RegisteredPoStProof]abi.RegisteredSealProof) + for sealProof, info := range abi.SealProofInfos { + PoStToSealMap[info.WinningPoStProof] = sealProof + PoStToSealMap[info.WindowPoStProof] = sealProof + } +} + +func GetSealProofFromPoStProof(postProof abi.RegisteredPoStProof) (abi.RegisteredSealProof, error) { + sealProof, exists := PoStToSealMap[postProof] + if !exists { + return 0, xerrors.New("no corresponding RegisteredSealProof for the given RegisteredPoStProof") + } + return sealProof, nil +} + func min(a, b int) int { if a < b { return a diff --git a/chain/actors/policy/policy.go.template b/chain/actors/policy/policy.go.template index 8803c97e6..d13518e0a 100644 --- a/chain/actors/policy/policy.go.template +++ b/chain/actors/policy/policy.go.template @@ -343,9 +343,26 @@ func AggregatePreCommitNetworkFee(nwVer network.Version, aggregateSize int, base } } +var PoStToSealMap map[abi.RegisteredPoStProof]abi.RegisteredSealProof +func init() { + PoStToSealMap = make(map[abi.RegisteredPoStProof]abi.RegisteredSealProof) + for sealProof, info := range abi.SealProofInfos { + PoStToSealMap[info.WinningPoStProof] = sealProof + PoStToSealMap[info.WindowPoStProof] = sealProof + } +} + +func GetSealProofFromPoStProof(postProof abi.RegisteredPoStProof) (abi.RegisteredSealProof, error) { + sealProof, exists := PoStToSealMap[postProof] + if !exists { + return 0, xerrors.New("no corresponding RegisteredSealProof for the given RegisteredPoStProof") + } + return sealProof, nil +} + func min(a, b int) int { if a < b { return a } return b -} \ No newline at end of file +} diff --git a/chain/consensus/common.go b/chain/consensus/common.go index 1d9fb3646..a7e5c40d2 100644 --- a/chain/consensus/common.go +++ b/chain/consensus/common.go @@ -362,7 +362,8 @@ func CreateBlockHeader(ctx context.Context, sm *stmgr.StateManager, pts *types.T var blsMsgCids, secpkMsgCids []cid.Cid var blsSigs []crypto.Signature nv := sm.GetNetworkVersion(ctx, bt.Epoch) - for _, msg := range bt.Messages { + for _, msgTmp := range bt.Messages { + msg := msgTmp if msg.Signature.Type == crypto.SigTypeBLS { blsSigs = append(blsSigs, msg.Signature) blsMessages = append(blsMessages, &msg.Message) diff --git a/chain/gen/genesis/miners.go b/chain/gen/genesis/miners.go index 0bac282d2..df8900cab 100644 --- a/chain/gen/genesis/miners.go +++ b/chain/gen/genesis/miners.go @@ -251,7 +251,8 @@ func SetupStorageMiners(ctx context.Context, cs *store.ChainStore, sys vm.Syscal } params := &markettypes.PublishStorageDealsParams{} - for _, preseal := range m.Sectors { + for _, presealTmp := range m.Sectors { + preseal := presealTmp preseal.Deal.VerifiedDeal = true preseal.Deal.EndEpoch = minerInfos[i].presealExp p := markettypes.ClientDealProposal{ diff --git a/chain/messagepool/block_proba_test.go b/chain/messagepool/block_proba_test.go index 6d121d222..2dc1dc25d 100644 --- a/chain/messagepool/block_proba_test.go +++ b/chain/messagepool/block_proba_test.go @@ -5,7 +5,6 @@ import ( "math" "math/rand" "testing" - "time" ) func TestBlockProbability(t *testing.T) { @@ -23,7 +22,6 @@ func TestBlockProbability(t *testing.T) { func TestWinnerProba(t *testing.T) { //stm: @OTHER_IMPLEMENTATION_BLOCK_PROB_002 - rand.Seed(time.Now().UnixNano()) const N = 1000000 winnerProba := noWinnersProb() sum := 0 diff --git a/chain/state/statetree.go b/chain/state/statetree.go index a0356f44c..61d7d500a 100644 --- a/chain/state/statetree.go +++ b/chain/state/statetree.go @@ -438,7 +438,8 @@ func (st *StateTree) Flush(ctx context.Context) (cid.Cid, error) { return cid.Undef, xerrors.Errorf("tried to flush state tree with snapshots on the stack") } - for addr, sto := range st.snaps.layers[0].actors { + for addr, stoTmp := range st.snaps.layers[0].actors { + sto := stoTmp if sto.Delete { if err := st.root.Delete(abi.AddrKey(addr)); err != nil { return cid.Undef, err diff --git a/chain/types/fil.go b/chain/types/fil.go index 60a2940c6..2a0ccb460 100644 --- a/chain/types/fil.go +++ b/chain/types/fil.go @@ -12,6 +12,9 @@ import ( type FIL BigInt func (f FIL) String() string { + if f.Int == nil { + return "0 FIL" + } return f.Unitless() + " FIL" } diff --git a/chain/vectors/gen/main.go b/chain/vectors/gen/main.go index ce9f1baf8..f4b7c82da 100644 --- a/chain/vectors/gen/main.go +++ b/chain/vectors/gen/main.go @@ -2,6 +2,7 @@ package main import ( "context" + crand "crypto/rand" "encoding/json" "fmt" "math/rand" @@ -145,7 +146,10 @@ func MakeUnsignedMessageVectors() []vectors.UnsignedMessageVector { } params := make([]byte, 32) - rand.Read(params) + _, err = crand.Read(params) + if err != nil { + panic(err) + } msg := &types.Message{ To: to, diff --git a/cli/helper.go b/cli/helper.go index 81a5bb033..fb1899e0a 100644 --- a/cli/helper.go +++ b/cli/helper.go @@ -1,6 +1,7 @@ package cli import ( + "errors" "fmt" "io" "os" @@ -8,7 +9,6 @@ import ( "syscall" ufcli "github.com/urfave/cli/v2" - "golang.org/x/xerrors" ) type PrintHelpErr struct { @@ -52,7 +52,7 @@ func RunApp(app *ufcli.App) { fmt.Fprintf(os.Stderr, "ERROR: %s\n\n", err) // nolint:errcheck } var phe *PrintHelpErr - if xerrors.As(err, &phe) { + if errors.As(err, &phe) { _ = ufcli.ShowCommandHelp(phe.Ctx, phe.Ctx.Command.Name) } os.Exit(1) diff --git a/cli/net.go b/cli/net.go index f25799e95..99ee92aef 100644 --- a/cli/net.go +++ b/cli/net.go @@ -847,7 +847,8 @@ var NetStatCmd = &cli.Command{ }) for _, stat := range stats { - printScope(&stat.stat, name+stat.name) + tmp := stat.stat + printScope(&tmp, name+stat.name) } } diff --git a/cli/util/api.go b/cli/util/api.go index 1d6928c3f..3602b752d 100644 --- a/cli/util/api.go +++ b/cli/util/api.go @@ -119,7 +119,7 @@ func GetAPIInfoMulti(ctx *cli.Context, t repo.RepoType) ([]APIInfo, error) { } } - return []APIInfo{}, fmt.Errorf("could not determine API endpoint for node type: %v", t.Type()) + return []APIInfo{}, fmt.Errorf("could not determine API endpoint for node type: %v. Try setting environment variable: %s", t.Type(), primaryEnv) } func GetAPIInfo(ctx *cli.Context, t repo.RepoType) (APIInfo, error) { @@ -164,6 +164,28 @@ func GetRawAPIMulti(ctx *cli.Context, t repo.RepoType, version string) ([]HttpHe return httpHeads, nil } +func GetRawAPIMultiV2(ctx *cli.Context, ainfoCfg []string, version string) ([]HttpHead, error) { + var httpHeads []HttpHead + + if len(ainfoCfg) == 0 { + return httpHeads, xerrors.Errorf("could not get API info: none configured. \nConsider getting base.toml with './lotus-provider config get base >/tmp/base.toml' \nthen adding \n[APIs] \n ChainApiInfo = [\" result_from lotus auth api-info --perm=admin \"]\n and updating it with './lotus-provider config set /tmp/base.toml'") + } + for _, i := range ainfoCfg { + ainfo := ParseApiInfo(i) + addr, err := ainfo.DialArgs(version) + if err != nil { + return httpHeads, xerrors.Errorf("could not get DialArgs: %w", err) + } + httpHeads = append(httpHeads, HttpHead{addr: addr, header: ainfo.AuthHeader()}) + } + + if IsVeryVerbose { + _, _ = fmt.Fprintf(ctx.App.Writer, "using raw API %s endpoint: %s\n", version, httpHeads[0].addr) + } + + return httpHeads, nil +} + func GetRawAPI(ctx *cli.Context, t repo.RepoType, version string) (string, http.Header, error) { heads, err := GetRawAPIMulti(ctx, t, version) if err != nil { @@ -393,6 +415,68 @@ func GetFullNodeAPIV1(ctx *cli.Context, opts ...GetFullNodeOption) (v1api.FullNo return &v1API, finalCloser, nil } +func GetFullNodeAPIV1LotusProvider(ctx *cli.Context, ainfoCfg []string, opts ...GetFullNodeOption) (v1api.FullNode, jsonrpc.ClientCloser, error) { + if tn, ok := ctx.App.Metadata["testnode-full"]; ok { + return tn.(v1api.FullNode), func() {}, nil + } + + var options GetFullNodeOptions + for _, opt := range opts { + opt(&options) + } + + var rpcOpts []jsonrpc.Option + if options.ethSubHandler != nil { + rpcOpts = append(rpcOpts, jsonrpc.WithClientHandler("Filecoin", options.ethSubHandler), jsonrpc.WithClientHandlerAlias("eth_subscription", "Filecoin.EthSubscription")) + } + + heads, err := GetRawAPIMultiV2(ctx, ainfoCfg, "v1") + if err != nil { + return nil, nil, err + } + + if IsVeryVerbose { + _, _ = fmt.Fprintln(ctx.App.Writer, "using full node API v1 endpoint:", heads[0].addr) + } + + var fullNodes []api.FullNode + var closers []jsonrpc.ClientCloser + + for _, head := range heads { + v1api, closer, err := client.NewFullNodeRPCV1(ctx.Context, head.addr, head.header, rpcOpts...) + if err != nil { + log.Warnf("Not able to establish connection to node with addr: %s", head.addr) + continue + } + fullNodes = append(fullNodes, v1api) + closers = append(closers, closer) + } + + // When running in cluster mode and trying to establish connections to multiple nodes, fail + // if less than 2 lotus nodes are actually running + if len(heads) > 1 && len(fullNodes) < 2 { + return nil, nil, xerrors.Errorf("Not able to establish connection to more than a single node") + } + + finalCloser := func() { + for _, c := range closers { + c() + } + } + + var v1API api.FullNodeStruct + FullNodeProxy(fullNodes, &v1API) + + v, err := v1API.Version(ctx.Context) + if err != nil { + return nil, nil, err + } + if !v.APIVersion.EqMajorMinor(api.FullAPIVersion1) { + return nil, nil, xerrors.Errorf("Remote API version didn't match (expected %s, remote %s)", api.FullAPIVersion1, v.APIVersion) + } + return &v1API, finalCloser, nil +} + type GetStorageMinerOptions struct { PreferHttp bool } diff --git a/cmd/lotus-bench/main.go b/cmd/lotus-bench/main.go index 1db788498..7d3c0cde0 100644 --- a/cmd/lotus-bench/main.go +++ b/cmd/lotus-bench/main.go @@ -3,10 +3,10 @@ package main import ( "bytes" "context" + "crypto/rand" "encoding/json" "fmt" "math/big" - "math/rand" "os" "path/filepath" "sync" @@ -547,7 +547,10 @@ var sealBenchCmd = &cli.Command{ } var challenge [32]byte - rand.Read(challenge[:]) + _, err = rand.Read(challenge[:]) + if err != nil { + return err + } beforePost := time.Now() @@ -777,9 +780,7 @@ func runSeals(sb *ffiwrapper.Sealer, sbfs *basicfs.Provider, numSectors int, par start := time.Now() log.Infof("[%d] Writing piece into sector...", i) - r := rand.New(rand.NewSource(100 + int64(i))) - - pi, err := sb.AddPiece(context.TODO(), sid, nil, abi.PaddedPieceSize(sectorSize).Unpadded(), r) + pi, err := sb.AddPiece(context.TODO(), sid, nil, abi.PaddedPieceSize(sectorSize).Unpadded(), rand.Reader) if err != nil { return nil, nil, err } diff --git a/cmd/lotus-miner/init.go b/cmd/lotus-miner/init.go index c109e85b9..1b76960e9 100644 --- a/cmd/lotus-miner/init.go +++ b/cmd/lotus-miner/init.go @@ -463,7 +463,7 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api v1api.FullNode wsts := statestore.New(namespace.Wrap(mds, modules.WorkerCallsPrefix)) smsts := statestore.New(namespace.Wrap(mds, modules.ManagerWorkPrefix)) - si := paths.NewIndex(nil) + si := paths.NewMemIndex(nil) lstor, err := paths.NewLocal(ctx, lr, si, nil) if err != nil { diff --git a/cmd/lotus-miner/proving.go b/cmd/lotus-miner/proving.go index 3ecc58ba7..2fc1427b5 100644 --- a/cmd/lotus-miner/proving.go +++ b/cmd/lotus-miner/proving.go @@ -559,7 +559,8 @@ var provingCheckProvableCmd = &cli.Command{ for parIdx, par := range partitions { sectors := make(map[abi.SectorNumber]struct{}) - sectorInfos, err := api.StateMinerSectors(ctx, addr, &par.LiveSectors, types.EmptyTSK) + tmp := par.LiveSectors + sectorInfos, err := api.StateMinerSectors(ctx, addr, &tmp, types.EmptyTSK) if err != nil { return err } diff --git a/cmd/lotus-provider/config.go b/cmd/lotus-provider/config.go new file mode 100644 index 000000000..5bd681429 --- /dev/null +++ b/cmd/lotus-provider/config.go @@ -0,0 +1,259 @@ +package main + +import ( + "context" + "database/sql" + "errors" + "fmt" + "io" + "os" + "path" + "strings" + + "github.com/BurntSushi/toml" + "github.com/urfave/cli/v2" + "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/node/config" +) + +var configCmd = &cli.Command{ + Name: "config", + Usage: "Manage node config by layers. The layer 'base' will always be applied. ", + Subcommands: []*cli.Command{ + configDefaultCmd, + configSetCmd, + configGetCmd, + configListCmd, + configViewCmd, + configRmCmd, + configMigrateCmd, + }, +} + +var configDefaultCmd = &cli.Command{ + Name: "default", + Aliases: []string{"defaults"}, + Usage: "Print default node config", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "no-comment", + Usage: "don't comment default values", + }, + }, + Action: func(cctx *cli.Context) error { + comment := !cctx.Bool("no-comment") + cfg, err := getDefaultConfig(comment) + if err != nil { + return err + } + fmt.Print(cfg) + + return nil + }, +} + +func getDefaultConfig(comment bool) (string, error) { + c := config.DefaultLotusProvider() + cb, err := config.ConfigUpdate(c, nil, config.Commented(comment), config.DefaultKeepUncommented(), config.NoEnv()) + if err != nil { + return "", err + } + return string(cb), nil +} + +var configSetCmd = &cli.Command{ + Name: "set", + Aliases: []string{"add", "update", "create"}, + Usage: "Set a config layer or the base by providing a filename or stdin.", + ArgsUsage: "a layer's file name", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "title", + Usage: "title of the config layer (req'd for stdin)", + }, + }, + Action: func(cctx *cli.Context) error { + args := cctx.Args() + + db, err := makeDB(cctx) + if err != nil { + return err + } + + name := cctx.String("title") + var stream io.Reader = os.Stdin + if args.Len() != 1 { + if cctx.String("title") == "" { + return errors.New("must have a title for stdin, or a file name") + } + } else { + stream, err = os.Open(args.First()) + if err != nil { + return fmt.Errorf("cannot open file %s: %w", args.First(), err) + } + if name == "" { + name = strings.Split(path.Base(args.First()), ".")[0] + } + } + bytes, err := io.ReadAll(stream) + if err != nil { + return fmt.Errorf("cannot read stream/file %w", err) + } + + lp := config.DefaultLotusProvider() // ensure it's toml + _, err = toml.Decode(string(bytes), lp) + if err != nil { + return fmt.Errorf("cannot decode file: %w", err) + } + _ = lp + + _, err = db.Exec(context.Background(), + `INSERT INTO harmony_config (title, config) VALUES ($1, $2) + ON CONFLICT (title) DO UPDATE SET config = excluded.config`, name, string(bytes)) + if err != nil { + return fmt.Errorf("unable to save config layer: %w", err) + } + + fmt.Println("Layer " + name + " created/updated") + return nil + }, +} + +var configGetCmd = &cli.Command{ + Name: "get", + Aliases: []string{"cat", "show"}, + Usage: "Get a config layer by name. You may want to pipe the output to a file, or use 'less'", + ArgsUsage: "layer name", + Action: func(cctx *cli.Context) error { + args := cctx.Args() + if args.Len() != 1 { + return fmt.Errorf("want 1 layer arg, got %d", args.Len()) + } + db, err := makeDB(cctx) + if err != nil { + return err + } + + var cfg string + err = db.QueryRow(context.Background(), `SELECT config FROM harmony_config WHERE title=$1`, args.First()).Scan(&cfg) + if err != nil { + return err + } + fmt.Println(cfg) + + return nil + }, +} + +var configListCmd = &cli.Command{ + Name: "list", + Aliases: []string{"ls"}, + Usage: "List config layers you can get.", + Flags: []cli.Flag{}, + Action: func(cctx *cli.Context) error { + db, err := makeDB(cctx) + if err != nil { + return err + } + var res []string + err = db.Select(context.Background(), &res, `SELECT title FROM harmony_config ORDER BY title`) + if err != nil { + return fmt.Errorf("unable to read from db: %w", err) + } + for _, r := range res { + fmt.Println(r) + } + + return nil + }, +} + +var configRmCmd = &cli.Command{ + Name: "remove", + Aliases: []string{"rm", "del", "delete"}, + Usage: "Remove a named config layer.", + Flags: []cli.Flag{}, + Action: func(cctx *cli.Context) error { + args := cctx.Args() + if args.Len() != 1 { + return errors.New("must have exactly 1 arg for the layer name") + } + db, err := makeDB(cctx) + if err != nil { + return err + } + ct, err := db.Exec(context.Background(), `DELETE FROM harmony_config WHERE title=$1`, args.First()) + if err != nil { + return fmt.Errorf("unable to read from db: %w", err) + } + if ct == 0 { + return fmt.Errorf("no layer named %s", args.First()) + } + + return nil + }, +} +var configViewCmd = &cli.Command{ + Name: "interpret", + Aliases: []string{"view", "stacked", "stack"}, + Usage: "Interpret stacked config layers by this version of lotus-provider, with system-generated comments.", + ArgsUsage: "a list of layers to be interpreted as the final config", + Flags: []cli.Flag{ + &cli.StringSliceFlag{ + Name: "layers", + Usage: "comma or space separated list of layers to be interpreted", + Value: cli.NewStringSlice("base"), + Required: true, + }, + }, + Action: func(cctx *cli.Context) error { + db, err := makeDB(cctx) + if err != nil { + return err + } + lp, err := getConfig(cctx, db) + if err != nil { + return err + } + cb, err := config.ConfigUpdate(lp, config.DefaultLotusProvider(), config.Commented(true), config.DefaultKeepUncommented(), config.NoEnv()) + if err != nil { + return xerrors.Errorf("cannot interpret config: %w", err) + } + fmt.Println(string(cb)) + return nil + }, +} + +func getConfig(cctx *cli.Context, db *harmonydb.DB) (*config.LotusProviderConfig, error) { + lp := config.DefaultLotusProvider() + have := []string{} + layers := cctx.StringSlice("layers") + for _, layer := range layers { + text := "" + err := db.QueryRow(cctx.Context, `SELECT config FROM harmony_config WHERE title=$1`, layer).Scan(&text) + if err != nil { + if strings.Contains(err.Error(), sql.ErrNoRows.Error()) { + return nil, fmt.Errorf("missing layer '%s' ", layer) + } + if layer == "base" { + return nil, errors.New(`lotus-provider defaults to a layer named 'base'. + Either use 'migrate' command or edit a base.toml and upload it with: lotus-provider config set base.toml`) + } + return nil, fmt.Errorf("could not read layer '%s': %w", layer, err) + } + meta, err := toml.Decode(text, &lp) + if err != nil { + return nil, fmt.Errorf("could not read layer, bad toml %s: %w", layer, err) + } + for _, k := range meta.Keys() { + have = append(have, strings.Join(k, " ")) + } + } + _ = have // FUTURE: verify that required fields are here. + // If config includes 3rd-party config, consider JSONSchema as a way that + // 3rd-parties can dynamically include config requirements and we can + // validate the config. Because of layering, we must validate @ startup. + return lp, nil +} diff --git a/cmd/lotus-provider/main.go b/cmd/lotus-provider/main.go new file mode 100644 index 000000000..19cc6f5f9 --- /dev/null +++ b/cmd/lotus-provider/main.go @@ -0,0 +1,160 @@ +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "runtime/debug" + "syscall" + + "github.com/fatih/color" + logging "github.com/ipfs/go-log/v2" + "github.com/urfave/cli/v2" + + "github.com/filecoin-project/lotus/build" + lcli "github.com/filecoin-project/lotus/cli" + cliutil "github.com/filecoin-project/lotus/cli/util" + "github.com/filecoin-project/lotus/lib/lotuslog" + "github.com/filecoin-project/lotus/lib/tracing" + "github.com/filecoin-project/lotus/node/repo" +) + +var log = logging.Logger("main") + +func SetupCloseHandler() { + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt, syscall.SIGTERM) + go func() { + <-c + fmt.Println("\r- Ctrl+C pressed in Terminal") + debug.PrintStack() + os.Exit(1) + }() +} + +func main() { + SetupCloseHandler() + + lotuslog.SetupLogLevels() + + local := []*cli.Command{ + //initCmd, + runCmd, + stopCmd, + configCmd, + testCmd, + //backupCmd, + //lcli.WithCategory("chain", actorCmd), + //lcli.WithCategory("storage", sectorsCmd), + //lcli.WithCategory("storage", provingCmd), + //lcli.WithCategory("storage", storageCmd), + //lcli.WithCategory("storage", sealingCmd), + } + + jaeger := tracing.SetupJaegerTracing("lotus") + defer func() { + if jaeger != nil { + _ = jaeger.ForceFlush(context.Background()) + } + }() + + for _, cmd := range local { + cmd := cmd + originBefore := cmd.Before + cmd.Before = func(cctx *cli.Context) error { + if jaeger != nil { + _ = jaeger.Shutdown(cctx.Context) + } + jaeger = tracing.SetupJaegerTracing("lotus/" + cmd.Name) + + if cctx.IsSet("color") { + color.NoColor = !cctx.Bool("color") + } + + if originBefore != nil { + return originBefore(cctx) + } + + return nil + } + } + + app := &cli.App{ + Name: "lotus-provider", + Usage: "Filecoin decentralized storage network provider", + Version: build.UserVersion(), + EnableBashCompletion: true, + Flags: []cli.Flag{ + &cli.BoolFlag{ + // examined in the Before above + Name: "color", + Usage: "use color in display output", + DefaultText: "depends on output being a TTY", + }, + &cli.StringFlag{ + Name: "panic-reports", + EnvVars: []string{"LOTUS_PANIC_REPORT_PATH"}, + Hidden: true, + Value: "~/.lotusprovider", // should follow --repo default + }, + &cli.StringFlag{ + Name: "db-host", + EnvVars: []string{"LOTUS_DB_HOST"}, + Usage: "Command separated list of hostnames for yugabyte cluster", + Value: "yugabyte", + }, + &cli.StringFlag{ + Name: "db-name", + EnvVars: []string{"LOTUS_DB_NAME", "LOTUS_HARMONYDB_HOSTS"}, + Value: "yugabyte", + }, + &cli.StringFlag{ + Name: "db-user", + EnvVars: []string{"LOTUS_DB_USER", "LOTUS_HARMONYDB_USERNAME"}, + Value: "yugabyte", + }, + &cli.StringFlag{ + Name: "db-password", + EnvVars: []string{"LOTUS_DB_PASSWORD", "LOTUS_HARMONYDB_PASSWORD"}, + Value: "yugabyte", + }, + &cli.StringFlag{ + Name: "db-port", + EnvVars: []string{"LOTUS_DB_PORT", "LOTUS_HARMONYDB_PORT"}, + Hidden: true, + Value: "5433", + }, + &cli.StringFlag{ + Name: "layers", + EnvVars: []string{"LOTUS_LAYERS", "LOTUS_CONFIG_LAYERS"}, + Value: "base", + }, + &cli.StringFlag{ + Name: FlagRepoPath, + EnvVars: []string{"LOTUS_REPO_PATH"}, + Value: "~/.lotusprovider", + }, + cliutil.FlagVeryVerbose, + }, + Commands: append(local, lcli.CommonCommands...), + Before: func(c *cli.Context) error { + return nil + }, + After: func(c *cli.Context) error { + if r := recover(); r != nil { + // Generate report in LOTUS_PATH and re-raise panic + build.GeneratePanicReport(c.String("panic-reports"), c.String(FlagRepoPath), c.App.Name) + panic(r) + } + return nil + }, + } + app.Setup() + app.Metadata["repoType"] = repo.Provider + lcli.RunApp(app) +} + +const ( + FlagRepoPath = "repo-path" +) diff --git a/cmd/lotus-provider/migrate.go b/cmd/lotus-provider/migrate.go new file mode 100644 index 000000000..819499402 --- /dev/null +++ b/cmd/lotus-provider/migrate.go @@ -0,0 +1,242 @@ +package main + +import ( + "bytes" + "context" + "encoding/base64" + "errors" + "fmt" + "os" + "path" + "strings" + + "github.com/BurntSushi/toml" + "github.com/fatih/color" + "github.com/ipfs/go-datastore" + "github.com/samber/lo" + "github.com/urfave/cli/v2" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + + cliutil "github.com/filecoin-project/lotus/cli/util" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/node/modules" + "github.com/filecoin-project/lotus/node/repo" +) + +var configMigrateCmd = &cli.Command{ + Name: "from-miner", + Usage: "Express a database config (for lotus-provider) from an existing miner.", + Description: "Express a database config (for lotus-provider) from an existing miner.", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: FlagMinerRepo, + Aliases: []string{FlagMinerRepoDeprecation}, + EnvVars: []string{"LOTUS_MINER_PATH", "LOTUS_STORAGE_PATH"}, + Value: "~/.lotusminer", + Usage: fmt.Sprintf("Specify miner repo path. flag(%s) and env(LOTUS_STORAGE_PATH) are DEPRECATION, will REMOVE SOON", FlagMinerRepoDeprecation), + }, + &cli.StringFlag{ + Name: "repo", + EnvVars: []string{"LOTUS_PATH"}, + Hidden: true, + Value: "~/.lotus", + }, + &cli.StringFlag{ + Name: "to-layer", + Aliases: []string{"t"}, + Usage: "The layer name for this data push. 'base' is recommended for single-miner setup.", + }, + &cli.BoolFlag{ + Name: "overwrite", + Aliases: []string{"o"}, + Usage: "Use this with --to-layer to replace an existing layer", + }, + }, + Action: fromMiner, +} + +const ( + FlagMinerRepo = "miner-repo" +) + +const FlagMinerRepoDeprecation = "storagerepo" + +func fromMiner(cctx *cli.Context) (err error) { + ctx := context.Background() + cliCommandColor := color.New(color.FgHiBlue).SprintFunc() + configColor := color.New(color.FgHiGreen).SprintFunc() + + r, err := repo.NewFS(cctx.String(FlagMinerRepo)) + if err != nil { + return err + } + + ok, err := r.Exists() + if err != nil { + return err + } + + if !ok { + return fmt.Errorf("repo not initialized") + } + + lr, err := r.LockRO(repo.StorageMiner) + if err != nil { + return fmt.Errorf("locking repo: %w", err) + } + defer func() { _ = lr.Close() }() + + cfgNode, err := lr.Config() + if err != nil { + return fmt.Errorf("getting node config: %w", err) + } + smCfg := cfgNode.(*config.StorageMiner) + + db, err := harmonydb.NewFromConfig(smCfg.HarmonyDB) + if err != nil { + return fmt.Errorf("could not reach the database. Ensure the Miner config toml's HarmonyDB entry"+ + " is setup to reach Yugabyte correctly: %w", err) + } + + var titles []string + err = db.Select(ctx, &titles, `SELECT title FROM harmony_config WHERE LENGTH(config) > 0`) + if err != nil { + return fmt.Errorf("miner cannot reach the db. Ensure the config toml's HarmonyDB entry"+ + " is setup to reach Yugabyte correctly: %s", err.Error()) + } + name := cctx.String("to-layer") + if name == "" { + name = fmt.Sprintf("mig%d", len(titles)) + } else { + if lo.Contains(titles, name) && !cctx.Bool("overwrite") { + return errors.New("the overwrite flag is needed to replace existing layer: " + name) + } + } + msg := "Layer " + configColor(name) + ` created. ` + + // Copy over identical settings: + + buf, err := os.ReadFile(path.Join(lr.Path(), "config.toml")) + if err != nil { + return fmt.Errorf("could not read config.toml: %w", err) + } + var lpCfg config.LotusProviderConfig + _, err = toml.Decode(string(buf), &lpCfg) + if err != nil { + return fmt.Errorf("could not decode toml: %w", err) + } + + // Populate Miner Address + mmeta, err := lr.Datastore(ctx, "/metadata") + if err != nil { + return xerrors.Errorf("opening miner metadata datastore: %w", err) + } + defer func() { + _ = mmeta.Close() + }() + + maddrBytes, err := mmeta.Get(ctx, datastore.NewKey("miner-address")) + if err != nil { + return xerrors.Errorf("getting miner address datastore entry: %w", err) + } + + addr, err := address.NewFromBytes(maddrBytes) + if err != nil { + return xerrors.Errorf("parsing miner actor address: %w", err) + } + + lpCfg.Addresses.MinerAddresses = []string{addr.String()} + + ks, err := lr.KeyStore() + if err != nil { + return xerrors.Errorf("keystore err: %w", err) + } + js, err := ks.Get(modules.JWTSecretName) + if err != nil { + return xerrors.Errorf("error getting JWTSecretName: %w", err) + } + + lpCfg.Apis.StorageRPCSecret = base64.StdEncoding.EncodeToString(js.PrivateKey) + + // Populate API Key + _, header, err := cliutil.GetRawAPI(cctx, repo.FullNode, "v0") + if err != nil { + return fmt.Errorf("cannot read API: %w", err) + } + + ainfo, err := cliutil.GetAPIInfo(&cli.Context{}, repo.FullNode) + if err != nil { + return xerrors.Errorf(`could not get API info for FullNode: %w + Set the environment variable to the value of "lotus auth api-info --perm=admin"`, err) + } + lpCfg.Apis.ChainApiInfo = []string{header.Get("Authorization")[7:] + ":" + ainfo.Addr} + + // Enable WindowPoSt + lpCfg.Subsystems.EnableWindowPost = true + msg += "\nBefore running lotus-provider, ensure any miner/worker answering of WindowPost is disabled by " + + "(on Miner) " + configColor("DisableBuiltinWindowPoSt=true") + " and (on Workers) not enabling windowpost on CLI or via " + + "environment variable " + configColor("LOTUS_WORKER_WINDOWPOST") + "." + + // Express as configTOML + configTOML := &bytes.Buffer{} + if err = toml.NewEncoder(configTOML).Encode(lpCfg); err != nil { + return err + } + + if !lo.Contains(titles, "base") { + cfg, err := getDefaultConfig(true) + if err != nil { + return xerrors.Errorf("Cannot get default config: %w", err) + } + _, err = db.Exec(ctx, "INSERT INTO harmony_config (title, config) VALUES ('base', $1)", cfg) + + if err != nil { + return err + } + } + + if cctx.Bool("overwrite") { + i, err := db.Exec(ctx, "DELETE FROM harmony_config WHERE title=$1", name) + if i != 0 { + fmt.Println("Overwriting existing layer") + } + if err != nil { + fmt.Println("Got error while deleting existing layer: " + err.Error()) + } + } + + _, err = db.Exec(ctx, "INSERT INTO harmony_config (title, config) VALUES ($1, $2)", name, configTOML.String()) + if err != nil { + return err + } + + dbSettings := "" + def := config.DefaultStorageMiner().HarmonyDB + if def.Hosts[0] != smCfg.HarmonyDB.Hosts[0] { + dbSettings += ` --db-host="` + strings.Join(smCfg.HarmonyDB.Hosts, ",") + `"` + } + if def.Port != smCfg.HarmonyDB.Port { + dbSettings += " --db-port=" + smCfg.HarmonyDB.Port + } + if def.Username != smCfg.HarmonyDB.Username { + dbSettings += ` --db-user="` + smCfg.HarmonyDB.Username + `"` + } + if def.Password != smCfg.HarmonyDB.Password { + dbSettings += ` --db-password="` + smCfg.HarmonyDB.Password + `"` + } + if def.Database != smCfg.HarmonyDB.Database { + dbSettings += ` --db-name="` + smCfg.HarmonyDB.Database + `"` + } + + msg += ` +To work with the config: +` + cliCommandColor(`lotus-provider `+dbSettings+` config help `) + msg += ` +To run Lotus Provider: in its own machine or cgroup without other files, use the command: +` + cliCommandColor(`lotus-provider `+dbSettings+` run --layers="`+name+`"`) + fmt.Println(msg) + return nil +} diff --git a/cmd/lotus-provider/proving.go b/cmd/lotus-provider/proving.go new file mode 100644 index 000000000..577b5b5f9 --- /dev/null +++ b/cmd/lotus-provider/proving.go @@ -0,0 +1,198 @@ +package main + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "os" + "time" + + "github.com/urfave/cli/v2" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-state-types/dline" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/provider" +) + +var testCmd = &cli.Command{ + Name: "test", + Usage: "Utility functions for testing", + Subcommands: []*cli.Command{ + //provingInfoCmd, + wdPostCmd, + }, +} + +var wdPostCmd = &cli.Command{ + Name: "window-post", + Aliases: []string{"wd", "windowpost", "wdpost"}, + Usage: "Compute a proof-of-spacetime for a sector (requires the sector to be pre-sealed). These will not send to the chain.", + Subcommands: []*cli.Command{ + wdPostHereCmd, + wdPostTaskCmd, + }, +} + +// wdPostTaskCmd writes to harmony_task and wdpost_partition_tasks, then waits for the result. +// It is intended to be used to test the windowpost scheduler. +// The end of the compute task puts the task_id onto wdpost_proofs, which is read by the submit task. +// The submit task will not send test tasks to the chain, and instead will write the result to harmony_test. +// The result is read by this command, and printed to stdout. +var wdPostTaskCmd = &cli.Command{ + Name: "task", + Aliases: []string{"scheduled", "schedule", "async", "asynchronous"}, + Usage: "Test the windowpost scheduler by running it on the next available lotus-provider. ", + Flags: []cli.Flag{ + &cli.Uint64Flag{ + Name: "deadline", + Usage: "deadline to compute WindowPoSt for ", + Value: 0, + }, + &cli.StringSliceFlag{ + Name: "layers", + Usage: "list of layers to be interpreted (atop defaults). Default: base", + Value: cli.NewStringSlice("base"), + }, + }, + Action: func(cctx *cli.Context) error { + ctx := context.Background() + + deps, err := getDeps(ctx, cctx) + if err != nil { + return err + } + + ts, err := deps.full.ChainHead(ctx) + if err != nil { + return xerrors.Errorf("cannot get chainhead %w", err) + } + ht := ts.Height() + + addr, err := address.NewFromString(deps.cfg.Addresses.MinerAddresses[0]) + if err != nil { + return xerrors.Errorf("cannot get miner address %w", err) + } + maddr, err := address.IDFromAddress(addr) + if err != nil { + return xerrors.Errorf("cannot get miner id %w", err) + } + var id int64 + _, err = deps.db.BeginTransaction(ctx, func(tx *harmonydb.Tx) (commit bool, err error) { + err = tx.QueryRow(`INSERT INTO harmony_task (name, posted_time, added_by) VALUES ('WdPost', CURRENT_TIMESTAMP, 123) RETURNING id`).Scan(&id) + if err != nil { + log.Error("inserting harmony_task: ", err) + return false, xerrors.Errorf("inserting harmony_task: %w", err) + } + _, err = tx.Exec(`INSERT INTO wdpost_partition_tasks + (task_id, sp_id, proving_period_start, deadline_index, partition_index) VALUES ($1, $2, $3, $4, $5)`, + id, maddr, ht, cctx.Uint64("deadline"), 0) + if err != nil { + log.Error("inserting wdpost_partition_tasks: ", err) + return false, xerrors.Errorf("inserting wdpost_partition_tasks: %w", err) + } + _, err = tx.Exec("INSERT INTO harmony_test (task_id) VALUES ($1)", id) + if err != nil { + return false, xerrors.Errorf("inserting into harmony_tests: %w", err) + } + return true, nil + }) + if err != nil { + return xerrors.Errorf("writing SQL transaction: %w", err) + } + fmt.Printf("Inserted task %v. Waiting for success ", id) + var result sql.NullString + for { + time.Sleep(time.Second) + err = deps.db.QueryRow(ctx, `SELECT result FROM harmony_test WHERE task_id=$1`, id).Scan(&result) + if err != nil { + return xerrors.Errorf("reading result from harmony_test: %w", err) + } + if result.Valid { + break + } + fmt.Print(".") + } + log.Infof("Result:", result.String) + return nil + }, +} + +// This command is intended to be used to verify PoSt compute performance. +// It will not send any messages to the chain. Since it can compute any deadline, output may be incorrectly timed for the chain. +// The entire processing happens in this process while you wait. It does not use the scheduler. +var wdPostHereCmd = &cli.Command{ + Name: "here", + Aliases: []string{"cli"}, + Usage: "Compute WindowPoSt for performance and configuration testing.", + Description: `Note: This command is intended to be used to verify PoSt compute performance. +It will not send any messages to the chain. Since it can compute any deadline, output may be incorrectly timed for the chain.`, + ArgsUsage: "[deadline index]", + Flags: []cli.Flag{ + &cli.Uint64Flag{ + Name: "deadline", + Usage: "deadline to compute WindowPoSt for ", + Value: 0, + }, + &cli.StringSliceFlag{ + Name: "layers", + Usage: "list of layers to be interpreted (atop defaults). Default: base", + Value: cli.NewStringSlice("base"), + }, + &cli.StringFlag{ + Name: "storage-json", + Usage: "path to json file containing storage config", + Value: "~/.lotus-provider/storage.json", + }, + &cli.Uint64Flag{ + Name: "partition", + Usage: "partition to compute WindowPoSt for", + Value: 0, + }, + }, + Action: func(cctx *cli.Context) error { + + ctx := context.Background() + deps, err := getDeps(ctx, cctx) + if err != nil { + return err + } + + wdPostTask, wdPoStSubmitTask, derlareRecoverTask, err := provider.WindowPostScheduler(ctx, deps.cfg.Fees, deps.cfg.Proving, deps.full, deps.verif, deps.lw, nil, + deps.as, deps.maddrs, deps.db, deps.stor, deps.si, deps.cfg.Subsystems.WindowPostMaxTasks) + if err != nil { + return err + } + _, _ = wdPoStSubmitTask, derlareRecoverTask + + if len(deps.maddrs) == 0 { + return errors.New("no miners to compute WindowPoSt for") + } + head, err := deps.full.ChainHead(ctx) + if err != nil { + return xerrors.Errorf("failed to get chain head: %w", err) + } + + di := dline.NewInfo(head.Height(), cctx.Uint64("deadline"), 0, 0, 0, 10 /*challenge window*/, 0, 0) + + for _, maddr := range deps.maddrs { + out, err := wdPostTask.DoPartition(ctx, head, address.Address(maddr), di, cctx.Uint64("partition")) + if err != nil { + fmt.Println("Error computing WindowPoSt for miner", maddr, err) + continue + } + fmt.Println("Computed WindowPoSt for miner", maddr, ":") + err = json.NewEncoder(os.Stdout).Encode(out) + if err != nil { + fmt.Println("Could not encode WindowPoSt output for miner", maddr, err) + continue + } + } + + return nil + }, +} diff --git a/cmd/lotus-provider/rpc/rpc.go b/cmd/lotus-provider/rpc/rpc.go new file mode 100644 index 000000000..3ae3e2a1f --- /dev/null +++ b/cmd/lotus-provider/rpc/rpc.go @@ -0,0 +1,51 @@ +package rpc + +import ( + "context" + "net/http" + + "github.com/gorilla/mux" + + // logging "github.com/ipfs/go-log/v2" + "github.com/filecoin-project/go-jsonrpc" + "github.com/filecoin-project/go-jsonrpc/auth" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/lib/rpcenc" + "github.com/filecoin-project/lotus/metrics/proxy" +) + +//var log = logging.Logger("lp/rpc") + +func LotusProviderHandler( + authv func(ctx context.Context, token string) ([]auth.Permission, error), + remote http.HandlerFunc, + a api.LotusProvider, + permissioned bool) http.Handler { + mux := mux.NewRouter() + readerHandler, readerServerOpt := rpcenc.ReaderParamDecoder() + rpcServer := jsonrpc.NewServer(jsonrpc.WithServerErrors(api.RPCErrors), readerServerOpt) + + wapi := proxy.MetricedAPI[api.LotusProvider, api.LotusProviderStruct](a) + if permissioned { + wapi = api.PermissionedAPI[api.LotusProvider, api.LotusProviderStruct](wapi) + } + + rpcServer.Register("Filecoin", wapi) + rpcServer.AliasMethod("rpc.discover", "Filecoin.Discover") + + mux.Handle("/rpc/v0", rpcServer) + mux.Handle("/rpc/streams/v0/push/{uuid}", readerHandler) + mux.PathPrefix("/remote").HandlerFunc(remote) + mux.PathPrefix("/").Handler(http.DefaultServeMux) // pprof + + if !permissioned { + return mux + } + + ah := &auth.Handler{ + Verify: authv, + Next: mux.ServeHTTP, + } + return ah +} diff --git a/cmd/lotus-provider/run.go b/cmd/lotus-provider/run.go new file mode 100644 index 000000000..bf19ee537 --- /dev/null +++ b/cmd/lotus-provider/run.go @@ -0,0 +1,459 @@ +package main + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "net" + "net/http" + "os" + "strings" + "time" + + "github.com/gbrlsnchs/jwt/v3" + "github.com/gorilla/mux" + ds "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + "github.com/pkg/errors" + "github.com/samber/lo" + "github.com/urfave/cli/v2" + "go.opencensus.io/stats" + "go.opencensus.io/tag" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-jsonrpc/auth" + "github.com/filecoin-project/go-statestore" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/build" + lcli "github.com/filecoin-project/lotus/cli" + cliutil "github.com/filecoin-project/lotus/cli/util" + "github.com/filecoin-project/lotus/cmd/lotus-provider/rpc" + "github.com/filecoin-project/lotus/journal" + "github.com/filecoin-project/lotus/journal/alerting" + "github.com/filecoin-project/lotus/journal/fsjournal" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/ulimit" + "github.com/filecoin-project/lotus/metrics" + "github.com/filecoin-project/lotus/node" + "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/node/modules/dtypes" + "github.com/filecoin-project/lotus/node/repo" + "github.com/filecoin-project/lotus/provider" + "github.com/filecoin-project/lotus/provider/lpmessage" + "github.com/filecoin-project/lotus/provider/lpwinning" + "github.com/filecoin-project/lotus/storage/ctladdr" + "github.com/filecoin-project/lotus/storage/paths" + "github.com/filecoin-project/lotus/storage/sealer" + "github.com/filecoin-project/lotus/storage/sealer/ffiwrapper" + "github.com/filecoin-project/lotus/storage/sealer/storiface" +) + +type stackTracer interface { + StackTrace() errors.StackTrace +} + +var runCmd = &cli.Command{ + Name: "run", + Usage: "Start a lotus provider process", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "listen", + Usage: "host address and port the worker api will listen on", + Value: "0.0.0.0:12300", + EnvVars: []string{"LOTUS_WORKER_LISTEN"}, + }, + &cli.BoolFlag{ + Name: "nosync", + Usage: "don't check full-node sync status", + }, + &cli.BoolFlag{ + Name: "halt-after-init", + Usage: "only run init, then return", + Hidden: true, + }, + &cli.BoolFlag{ + Name: "manage-fdlimit", + Usage: "manage open file limit", + Value: true, + }, + &cli.StringSliceFlag{ + Name: "layers", + Usage: "list of layers to be interpreted (atop defaults). Default: base", + Value: cli.NewStringSlice("base"), + }, + &cli.StringFlag{ + Name: "storage-json", + Usage: "path to json file containing storage config", + Value: "~/.lotus-provider/storage.json", + }, + &cli.StringFlag{ + Name: "journal", + Usage: "path to journal files", + Value: "~/.lotus-provider/", + }, + }, + Action: func(cctx *cli.Context) (err error) { + defer func() { + if err != nil { + if err, ok := err.(stackTracer); ok { + for _, f := range err.StackTrace() { + fmt.Printf("%+s:%d\n", f, f) + } + } + } + }() + if !cctx.Bool("enable-gpu-proving") { + err := os.Setenv("BELLMAN_NO_GPU", "true") + if err != nil { + return err + } + } + + ctx, _ := tag.New(lcli.DaemonContext(cctx), + tag.Insert(metrics.Version, build.BuildVersion), + tag.Insert(metrics.Commit, build.CurrentCommit), + tag.Insert(metrics.NodeType, "provider"), + ) + shutdownChan := make(chan struct{}) + { + var ctxclose func() + ctx, ctxclose = context.WithCancel(ctx) + go func() { + <-shutdownChan + ctxclose() + }() + } + // Register all metric views + /* + if err := view.Register( + metrics.MinerNodeViews..., + ); err != nil { + log.Fatalf("Cannot register the view: %v", err) + } + */ + // Set the metric to one so it is published to the exporter + stats.Record(ctx, metrics.LotusInfo.M(1)) + + if cctx.Bool("manage-fdlimit") { + if _, _, err := ulimit.ManageFdLimit(); err != nil { + log.Errorf("setting file descriptor limit: %s", err) + } + } + + deps, err := getDeps(ctx, cctx) + + if err != nil { + return err + } + cfg, db, full, verif, lw, as, maddrs, stor, si, localStore := deps.cfg, deps.db, deps.full, deps.verif, deps.lw, deps.as, deps.maddrs, deps.stor, deps.si, deps.localStore + + var activeTasks []harmonytask.TaskInterface + + sender, sendTask := lpmessage.NewSender(full, full, db) + activeTasks = append(activeTasks, sendTask) + + /////////////////////////////////////////////////////////////////////// + ///// Task Selection + /////////////////////////////////////////////////////////////////////// + { + + if cfg.Subsystems.EnableWindowPost { + wdPostTask, wdPoStSubmitTask, derlareRecoverTask, err := provider.WindowPostScheduler(ctx, cfg.Fees, cfg.Proving, full, verif, lw, sender, + as, maddrs, db, stor, si, cfg.Subsystems.WindowPostMaxTasks) + if err != nil { + return err + } + activeTasks = append(activeTasks, wdPostTask, wdPoStSubmitTask, derlareRecoverTask) + } + + if cfg.Subsystems.EnableWinningPost { + winPoStTask := lpwinning.NewWinPostTask(cfg.Subsystems.WinningPostMaxTasks, db, lw, verif, full, maddrs) + activeTasks = append(activeTasks, winPoStTask) + } + } + log.Infow("This lotus_provider instance handles", + "miner_addresses", maddrs, + "tasks", lo.Map(activeTasks, func(t harmonytask.TaskInterface, _ int) string { return t.TypeDetails().Name })) + + taskEngine, err := harmonytask.New(db, activeTasks, deps.listenAddr) + if err != nil { + return err + } + + defer taskEngine.GracefullyTerminate(time.Hour) + + fh := &paths.FetchHandler{Local: localStore, PfHandler: &paths.DefaultPartialFileHandler{}} + remoteHandler := func(w http.ResponseWriter, r *http.Request) { + if !auth.HasPerm(r.Context(), nil, api.PermAdmin) { + w.WriteHeader(401) + _ = json.NewEncoder(w).Encode(struct{ Error string }{"unauthorized: missing admin permission"}) + return + } + + fh.ServeHTTP(w, r) + } + // local APIs + { + // debugging + mux := mux.NewRouter() + mux.PathPrefix("/").Handler(http.DefaultServeMux) // pprof + mux.PathPrefix("/remote").HandlerFunc(remoteHandler) + + /*ah := &auth.Handler{ + Verify: authv, + Next: mux.ServeHTTP, + }*/ // todo + + } + + var authVerify func(context.Context, string) ([]auth.Permission, error) + { + privateKey, err := base64.StdEncoding.DecodeString(deps.cfg.Apis.StorageRPCSecret) + if err != nil { + return xerrors.Errorf("decoding storage rpc secret: %w", err) + } + authVerify = func(ctx context.Context, token string) ([]auth.Permission, error) { + var payload jwtPayload + if _, err := jwt.Verify([]byte(token), jwt.NewHS256(privateKey), &payload); err != nil { + return nil, xerrors.Errorf("JWT Verification failed: %w", err) + } + + return payload.Allow, nil + } + } + // Serve the RPC. + srv := &http.Server{ + Handler: rpc.LotusProviderHandler( + authVerify, + remoteHandler, + &ProviderAPI{deps, shutdownChan}, + true), + ReadHeaderTimeout: time.Minute * 3, + BaseContext: func(listener net.Listener) context.Context { + ctx, _ := tag.New(context.Background(), tag.Upsert(metrics.APIInterface, "lotus-worker")) + return ctx + }, + } + + go func() { + <-ctx.Done() + log.Warn("Shutting down...") + if err := srv.Shutdown(context.TODO()); err != nil { + log.Errorf("shutting down RPC server failed: %s", err) + } + log.Warn("Graceful shutdown successful") + }() + + // Monitor for shutdown. + // TODO provide a graceful shutdown API on shutdownChan + finishCh := node.MonitorShutdown(shutdownChan) //node.ShutdownHandler{Component: "rpc server", StopFunc: rpcStopper}, + //node.ShutdownHandler{Component: "provider", StopFunc: stop}, + + <-finishCh + return nil + }, +} + +func makeDB(cctx *cli.Context) (*harmonydb.DB, error) { + dbConfig := config.HarmonyDB{ + Username: cctx.String("db-user"), + Password: cctx.String("db-password"), + Hosts: strings.Split(cctx.String("db-host"), ","), + Database: cctx.String("db-name"), + Port: cctx.String("db-port"), + } + return harmonydb.NewFromConfig(dbConfig) +} + +type jwtPayload struct { + Allow []auth.Permission +} + +func StorageAuth(apiKey string) (sealer.StorageAuth, error) { + if apiKey == "" { + return nil, xerrors.Errorf("no api key provided") + } + + rawKey, err := base64.StdEncoding.DecodeString(apiKey) + if err != nil { + return nil, xerrors.Errorf("decoding api key: %w", err) + } + + key := jwt.NewHS256(rawKey) + + p := jwtPayload{ + Allow: []auth.Permission{"admin"}, + } + + token, err := jwt.Sign(&p, key) + if err != nil { + return nil, err + } + + headers := http.Header{} + headers.Add("Authorization", "Bearer "+string(token)) + return sealer.StorageAuth(headers), nil +} + +type Deps struct { + cfg *config.LotusProviderConfig + db *harmonydb.DB + full api.FullNode + verif storiface.Verifier + lw *sealer.LocalWorker + as *ctladdr.AddressSelector + maddrs []dtypes.MinerAddress + stor *paths.Remote + si *paths.DBIndex + localStore *paths.Local + listenAddr string +} + +func getDeps(ctx context.Context, cctx *cli.Context) (*Deps, error) { + // Open repo + + repoPath := cctx.String(FlagRepoPath) + fmt.Println("repopath", repoPath) + r, err := repo.NewFS(repoPath) + if err != nil { + return nil, err + } + + ok, err := r.Exists() + if err != nil { + return nil, err + } + if !ok { + if err := r.Init(repo.Provider); err != nil { + return nil, err + } + } + + db, err := makeDB(cctx) + if err != nil { + return nil, err + } + + /////////////////////////////////////////////////////////////////////// + ///// Dependency Setup + /////////////////////////////////////////////////////////////////////// + + // The config feeds into task runners & their helpers + cfg, err := getConfig(cctx, db) + if err != nil { + return nil, err + } + + log.Debugw("config", "config", cfg) + + var verif storiface.Verifier = ffiwrapper.ProofVerifier + + as, err := provider.AddressSelector(&cfg.Addresses)() + if err != nil { + return nil, err + } + + de, err := journal.ParseDisabledEvents(cfg.Journal.DisabledEvents) + if err != nil { + return nil, err + } + j, err := fsjournal.OpenFSJournalPath(cctx.String("journal"), de) + if err != nil { + return nil, err + } + + full, fullCloser, err := cliutil.GetFullNodeAPIV1LotusProvider(cctx, cfg.Apis.ChainApiInfo) + if err != nil { + return nil, err + } + + go func() { + select { + case <-ctx.Done(): + fullCloser() + _ = j.Close() + } + }() + sa, err := StorageAuth(cfg.Apis.StorageRPCSecret) + if err != nil { + return nil, xerrors.Errorf(`'%w' while parsing the config toml's + [Apis] + StorageRPCSecret=%v +Get it with: jq .PrivateKey ~/.lotus-miner/keystore/MF2XI2BNNJ3XILLQOJUXMYLUMU`, err, cfg.Apis.StorageRPCSecret) + } + + al := alerting.NewAlertingSystem(j) + si := paths.NewDBIndex(al, db) + bls := &paths.BasicLocalStorage{ + PathToJSON: cctx.String("storage-json"), + } + + listenAddr := cctx.String("listen") + const unspecifiedAddress = "0.0.0.0" + addressSlice := strings.Split(listenAddr, ":") + if ip := net.ParseIP(addressSlice[0]); ip != nil { + if ip.String() == unspecifiedAddress { + rip, err := db.GetRoutableIP() + if err != nil { + return nil, err + } + listenAddr = rip + ":" + addressSlice[1] + } + } + localStore, err := paths.NewLocal(ctx, bls, si, []string{"http://" + listenAddr + "/remote"}) + if err != nil { + return nil, err + } + + stor := paths.NewRemote(localStore, si, http.Header(sa), 10, &paths.DefaultPartialFileHandler{}) + + wstates := statestore.New(dssync.MutexWrap(ds.NewMapDatastore())) + + // todo localWorker isn't the abstraction layer we want to use here, we probably want to go straight to ffiwrapper + // maybe with a lotus-provider specific abstraction. LocalWorker does persistent call tracking which we probably + // don't need (ehh.. maybe we do, the async callback system may actually work decently well with harmonytask) + lw := sealer.NewLocalWorker(sealer.WorkerConfig{}, stor, localStore, si, nil, wstates) + + var maddrs []dtypes.MinerAddress + for _, s := range cfg.Addresses.MinerAddresses { + addr, err := address.NewFromString(s) + if err != nil { + return nil, err + } + maddrs = append(maddrs, dtypes.MinerAddress(addr)) + } + + return &Deps{ // lint: intentionally not-named so it will fail if one is forgotten + cfg, + db, + full, + verif, + lw, + as, + maddrs, + stor, + si, + localStore, + listenAddr, + }, nil + +} + +type ProviderAPI struct { + *Deps + ShutdownChan chan struct{} +} + +func (p *ProviderAPI) Version(context.Context) (api.Version, error) { + return api.ProviderAPIVersion0, nil +} + +// Trigger shutdown +func (p *ProviderAPI) Shutdown(context.Context) error { + close(p.ShutdownChan) + return nil +} diff --git a/cmd/lotus-provider/stop.go b/cmd/lotus-provider/stop.go new file mode 100644 index 000000000..3376d762a --- /dev/null +++ b/cmd/lotus-provider/stop.go @@ -0,0 +1,29 @@ +package main + +import ( + _ "net/http/pprof" + + "github.com/urfave/cli/v2" + + lcli "github.com/filecoin-project/lotus/cli" +) + +var stopCmd = &cli.Command{ + Name: "stop", + Usage: "Stop a running lotus provider", + Flags: []cli.Flag{}, + Action: func(cctx *cli.Context) error { + api, closer, err := lcli.GetAPI(cctx) + if err != nil { + return err + } + defer closer() + + err = api.Shutdown(lcli.ReqContext(cctx)) + if err != nil { + return err + } + + return nil + }, +} diff --git a/cmd/lotus-shed/terminations.go b/cmd/lotus-shed/terminations.go index c5f35995a..563c1ba3a 100644 --- a/cmd/lotus-shed/terminations.go +++ b/cmd/lotus-shed/terminations.go @@ -157,7 +157,8 @@ var terminationsCmd = &cli.Command{ } for _, t := range termParams.Terminations { - sectors, err := minerSt.LoadSectors(&t.Sectors) + tmp := t.Sectors + sectors, err := minerSt.LoadSectors(&tmp) if err != nil { return err } diff --git a/cmd/lotus-sim/simulation/stages/funding_stage.go b/cmd/lotus-sim/simulation/stages/funding_stage.go index f75a9910d..4ce4afae1 100644 --- a/cmd/lotus-sim/simulation/stages/funding_stage.go +++ b/cmd/lotus-sim/simulation/stages/funding_stage.go @@ -166,7 +166,8 @@ func (fs *FundingStage) PackMessages(ctx context.Context, bb *blockbuilder.Block ) }() - for _, actor := range targets { + for _, actorTmp := range targets { + actor := actorTmp switch { case builtin.IsAccountActor(actor.Code): if _, err := bb.PushMessage(&types.Message{ diff --git a/cmd/lotus-worker/sealworker/rpc.go b/cmd/lotus-worker/sealworker/rpc.go index 97f78942e..4e720ef64 100644 --- a/cmd/lotus-worker/sealworker/rpc.go +++ b/cmd/lotus-worker/sealworker/rpc.go @@ -26,7 +26,11 @@ import ( var log = logging.Logger("sealworker") -func WorkerHandler(authv func(ctx context.Context, token string) ([]auth.Permission, error), remote http.HandlerFunc, a api.Worker, permissioned bool) http.Handler { +func WorkerHandler( + authv func(ctx context.Context, token string) ([]auth.Permission, error), + remote http.HandlerFunc, + a api.Worker, + permissioned bool) http.Handler { mux := mux.NewRouter() readerHandler, readerServerOpt := rpcenc.ReaderParamDecoder() rpcServer := jsonrpc.NewServer(jsonrpc.WithServerErrors(api.RPCErrors), readerServerOpt) diff --git a/cmd/lotus/daemon.go b/cmd/lotus/daemon.go index 44da4139a..5d8096d1f 100644 --- a/cmd/lotus/daemon.go +++ b/cmd/lotus/daemon.go @@ -269,6 +269,26 @@ var DaemonCmd = &cli.Command{ } } + if cctx.Bool("remove-existing-chain") { + lr, err := repo.NewFS(cctx.String("repo")) + if err != nil { + return xerrors.Errorf("error opening fs repo: %w", err) + } + + exists, err := lr.Exists() + if err != nil { + return err + } + if !exists { + return xerrors.Errorf("lotus repo doesn't exist") + } + + err = removeExistingChain(cctx, lr) + if err != nil { + return err + } + } + chainfile := cctx.String("import-chain") snapshot := cctx.String("import-snapshot") willImportChain := false diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index f0e3fa3f0..a65e82e95 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -145,6 +145,14 @@ # env var: LOTUS_SUBSYSTEMS_ENABLEMARKETS #EnableMarkets = false + # When enabled, the sector index will reside in an external database + # as opposed to the local KV store in the miner process + # This is useful to allow workers to bypass the lotus miner to access sector information + # + # type: bool + # env var: LOTUS_SUBSYSTEMS_ENABLESECTORINDEXDB + #EnableSectorIndexDB = false + # type: string # env var: LOTUS_SUBSYSTEMS_SEALERAPIINFO #SealerApiInfo = "" @@ -153,6 +161,31 @@ # env var: LOTUS_SUBSYSTEMS_SECTORINDEXAPIINFO #SectorIndexApiInfo = "" + # When window post is enabled, the miner will automatically submit window post proofs + # for all sectors that are eligible for window post + # IF WINDOW POST IS DISABLED, THE MINER WILL NOT SUBMIT WINDOW POST PROOFS + # THIS WILL RESULT IN FAULTS AND PENALTIES IF NO OTHER MECHANISM IS RUNNING + # TO SUBMIT WINDOW POST PROOFS. + # Note: This option is entirely disabling the window post scheduler, + # not just the builtin PoSt computation like Proving.DisableBuiltinWindowPoSt. + # This option will stop lotus-miner from performing any actions related + # to window post, including scheduling, submitting proofs, and recovering + # sectors. + # + # type: bool + # env var: LOTUS_SUBSYSTEMS_DISABLEWINDOWPOST + #DisableWindowPoSt = false + + # When winning post is disabled, the miner process will NOT attempt to mine + # blocks. This should only be set when there's an external process mining + # blocks on behalf of the miner. + # When disabled and no external block producers are configured, all potential + # block rewards will be missed! + # + # type: bool + # env var: LOTUS_SUBSYSTEMS_DISABLEWINNINGPOST + #DisableWinningPoSt = false + [Dealmaking] # When enabled, the miner can accept online deals @@ -896,3 +929,36 @@ #GCInterval = "1m0s" +[HarmonyDB] + # HOSTS is a list of hostnames to nodes running YugabyteDB + # in a cluster. Only 1 is required + # + # type: []string + # env var: LOTUS_HARMONYDB_HOSTS + #Hosts = ["127.0.0.1"] + + # The Yugabyte server's username with full credentials to operate on Lotus' Database. Blank for default. + # + # type: string + # env var: LOTUS_HARMONYDB_USERNAME + #Username = "yugabyte" + + # The password for the related username. Blank for default. + # + # type: string + # env var: LOTUS_HARMONYDB_PASSWORD + #Password = "yugabyte" + + # The database (logical partition) within Yugabyte. Blank for default. + # + # type: string + # env var: LOTUS_HARMONYDB_DATABASE + #Database = "yugabyte" + + # The port to find Yugabyte. Blank for default. + # + # type: string + # env var: LOTUS_HARMONYDB_PORT + #Port = "5433" + + diff --git a/documentation/en/default-lotus-provider-config.toml b/documentation/en/default-lotus-provider-config.toml new file mode 100644 index 000000000..91606e503 --- /dev/null +++ b/documentation/en/default-lotus-provider-config.toml @@ -0,0 +1,209 @@ +[Subsystems] + # type: bool + #EnableWindowPost = false + + # type: int + #WindowPostMaxTasks = 0 + + # type: bool + #EnableWinningPost = false + + # type: int + #WinningPostMaxTasks = 0 + + +[Fees] + # type: types.FIL + #DefaultMaxFee = "0.07 FIL" + + # type: types.FIL + #MaxPreCommitGasFee = "0.025 FIL" + + # type: types.FIL + #MaxCommitGasFee = "0.05 FIL" + + # type: types.FIL + #MaxTerminateGasFee = "0.5 FIL" + + # WindowPoSt is a high-value operation, so the default fee should be high. + # + # type: types.FIL + #MaxWindowPoStGasFee = "5 FIL" + + # type: types.FIL + #MaxPublishDealsFee = "0.05 FIL" + + [Fees.MaxPreCommitBatchGasFee] + # type: types.FIL + #Base = "0 FIL" + + # type: types.FIL + #PerSector = "0.02 FIL" + + [Fees.MaxCommitBatchGasFee] + # type: types.FIL + #Base = "0 FIL" + + # type: types.FIL + #PerSector = "0.03 FIL" + + +[Addresses] + # Addresses to send PreCommit messages from + # + # type: []string + #PreCommitControl = [] + + # Addresses to send Commit messages from + # + # type: []string + #CommitControl = [] + + # type: []string + #TerminateControl = [] + + # DisableOwnerFallback disables usage of the owner address for messages + # sent automatically + # + # type: bool + #DisableOwnerFallback = false + + # DisableWorkerFallback disables usage of the worker address for messages + # sent automatically, if control addresses are configured. + # A control address that doesn't have enough funds will still be chosen + # over the worker address if this flag is set. + # + # type: bool + #DisableWorkerFallback = false + + +[Proving] + # Maximum number of sector checks to run in parallel. (0 = unlimited) + # + # WARNING: Setting this value too high may make the node crash by running out of stack + # WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due + # to late submission. + # + # After changing this option, confirm that the new value works in your setup by invoking + # 'lotus-miner proving compute window-post 0' + # + # type: int + #ParallelCheckLimit = 32 + + # Maximum amount of time a proving pre-check can take for a sector. If the check times out the sector will be skipped + # + # WARNING: Setting this value too low risks in sectors being skipped even though they are accessible, just reading the + # test challenge took longer than this timeout + # WARNING: Setting this value too high risks missing PoSt deadline in case IO operations related to this sector are + # blocked (e.g. in case of disconnected NFS mount) + # + # type: Duration + #SingleCheckTimeout = "10m0s" + + # Maximum amount of time a proving pre-check can take for an entire partition. If the check times out, sectors in + # the partition which didn't get checked on time will be skipped + # + # WARNING: Setting this value too low risks in sectors being skipped even though they are accessible, just reading the + # test challenge took longer than this timeout + # WARNING: Setting this value too high risks missing PoSt deadline in case IO operations related to this partition are + # blocked or slow + # + # type: Duration + #PartitionCheckTimeout = "20m0s" + + # Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present. + # + # WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need + # to be recovered. Before enabling this option, make sure your PoSt workers work correctly. + # + # After changing this option, confirm that the new value works in your setup by invoking + # 'lotus-miner proving compute window-post 0' + # + # type: bool + #DisableBuiltinWindowPoSt = false + + # Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present. + # + # WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. + # Before enabling this option, make sure your PoSt workers work correctly. + # + # type: bool + #DisableBuiltinWinningPoSt = false + + # Disable WindowPoSt provable sector readability checks. + # + # In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges + # from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as + # we're only interested in checking that sector data can be read. + # + # When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process + # can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by + # the builtin logic not skipping snark computation when some sectors need to be skipped. + # + # When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and + # if challenges for some sectors aren't readable, those sectors will just get skipped. + # + # Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter + # time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should + # be negligible. + # + # NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers. + # + # NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is + # sent to the chain + # + # After changing this option, confirm that the new value works in your setup by invoking + # 'lotus-miner proving compute window-post 0' + # + # type: bool + #DisableWDPoStPreChecks = false + + # Maximum number of partitions to prove in a single SubmitWindowPoSt messace. 0 = network limit (3 in nv21) + # + # A single partition may contain up to 2349 32GiB sectors, or 2300 64GiB sectors. + # // + # Note that setting this value lower may result in less efficient gas use - more messages will be sent, + # to prove each deadline, resulting in more total gas use (but each message will have lower gas limit) + # + # Setting this value above the network limit has no effect + # + # type: int + #MaxPartitionsPerPoStMessage = 0 + + # In some cases when submitting DeclareFaultsRecovered messages, + # there may be too many recoveries to fit in a BlockGasLimit. + # In those cases it may be necessary to set this value to something low (eg 1); + # Note that setting this value lower may result in less efficient gas use - more messages will be sent than needed, + # resulting in more total gas use (but each message will have lower gas limit) + # + # type: int + #MaxPartitionsPerRecoveryMessage = 0 + + # Enable single partition per PoSt Message for partitions containing recovery sectors + # + # In cases when submitting PoSt messages which contain recovering sectors, the default network limit may still be + # too high to fit in the block gas limit. In those cases, it becomes useful to only house the single partition + # with recovering sectors in the post message + # + # Note that setting this value lower may result in less efficient gas use - more messages will be sent, + # to prove each deadline, resulting in more total gas use (but each message will have lower gas limit) + # + # type: bool + #SingleRecoveringPartitionPerPostMessage = false + + +[Journal] + # Events of the form: "system1:event1,system1:event2[,...]" + # + # type: string + #DisabledEvents = "" + + +[Apis] + # RPC Secret for the storage subsystem. + # If integrating with lotus-miner this must match the value from + # cat ~/.lotusminer/keystore/MF2XI2BNNJ3XILLQOJUXMYLUMU | jq -r .PrivateKey + # + # type: string + #StorageRPCSecret = "" + diff --git a/go.mod b/go.mod index 08a191ab8..59db622c9 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/filecoin-project/lotus -go 1.19 +go 1.20 retract v1.14.0 // Accidentally force-pushed tag, use v1.14.1+ instead. @@ -62,6 +62,7 @@ require ( github.com/filecoin-project/test-vectors/schema v0.0.7 github.com/gbrlsnchs/jwt/v3 v3.0.1 github.com/gdamore/tcell/v2 v2.2.0 + github.com/georgysavva/scany/v2 v2.0.0 github.com/go-openapi/spec v0.19.11 github.com/golang/mock v1.6.0 github.com/google/uuid v1.3.0 @@ -104,6 +105,8 @@ require ( github.com/ipld/go-ipld-selector-text-lite v0.0.1 github.com/ipni/go-libipni v0.0.8 github.com/ipni/index-provider v0.12.0 + github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa + github.com/jackc/pgx/v5 v5.4.1 github.com/kelseyhightower/envconfig v1.4.0 github.com/koalacxr/quantile v0.0.1 github.com/libp2p/go-buffer-pool v0.1.0 @@ -129,11 +132,14 @@ require ( github.com/multiformats/go-multihash v0.2.3 github.com/multiformats/go-varint v0.0.7 github.com/open-rpc/meta-schema v0.0.0-20201029221707-1b72ef2ea333 + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 + github.com/pkg/errors v0.9.1 github.com/polydawn/refmt v0.89.0 - github.com/prometheus/client_golang v1.14.0 + github.com/prometheus/client_golang v1.16.0 github.com/puzpuzpuz/xsync/v2 v2.4.0 github.com/raulk/clock v1.1.0 github.com/raulk/go-watchdog v1.3.0 + github.com/samber/lo v1.38.1 github.com/stretchr/testify v1.8.4 github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 github.com/triplewz/poseidon v0.0.0-20220525065023-a7cdb0e183e7 @@ -247,6 +253,9 @@ require ( github.com/ipfs/go-verifcid v0.0.2 // indirect github.com/ipld/go-ipld-adl-hamt v0.0.0-20220616142416-9004dbd839e0 // indirect github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect + github.com/jackc/puddle/v2 v2.2.0 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-random v0.0.0-20190219211222-123a90aedc0c // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect @@ -288,13 +297,11 @@ require ( github.com/onsi/ginkgo/v2 v2.11.0 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.42.0 // indirect - github.com/prometheus/procfs v0.9.0 // indirect + github.com/prometheus/procfs v0.10.1 // indirect github.com/prometheus/statsd_exporter v0.22.7 // indirect github.com/quic-go/qpack v0.4.0 // indirect github.com/quic-go/qtls-go1-20 v0.3.3 // indirect @@ -308,7 +315,7 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/tidwall/gjson v1.14.4 // indirect github.com/twmb/murmur3 v1.1.6 // indirect - github.com/ugorji/go/codec v1.2.6 // indirect + github.com/ugorji/go/codec v1.2.11 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.0.1 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect diff --git a/go.sum b/go.sum index 2400a8bfb..a1bbb9473 100644 --- a/go.sum +++ b/go.sum @@ -169,6 +169,7 @@ github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cockroachdb/cockroach-go/v2 v2.2.0 h1:/5znzg5n373N/3ESjHF5SMLxiW4RKB05Ql//KWfeTFs= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA= @@ -393,6 +394,8 @@ github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdk github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg= github.com/gdamore/tcell/v2 v2.2.0 h1:vSyEgKwraXPSOkvCk7IwOSyX+Pv3V2cV9CikJMXg4U4= github.com/gdamore/tcell/v2 v2.2.0/go.mod h1:cTTuF84Dlj/RqmaCIV5p4w8uG1zWdk0SF6oBpwHp4fU= +github.com/georgysavva/scany/v2 v2.0.0 h1:RGXqxDv4row7/FYoK8MRXAZXqoWF/NM+NP0q50k3DKU= +github.com/georgysavva/scany/v2 v2.0.0/go.mod h1:sigOdh+0qb/+aOs3TVhehVT10p8qJL7K/Zhyz8vWo38= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= @@ -450,6 +453,7 @@ github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -855,6 +859,16 @@ github.com/ipni/index-provider v0.12.0 h1:R3F6dxxKNv4XkE4GJZNLOG0bDEbBQ/S5iztXwS github.com/ipni/index-provider v0.12.0/go.mod h1:GhyrADJp7n06fqoc1djzkvL4buZYHzV8SoWrlxEo5F4= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 h1:QG4CGBqCeuBo6aZlGAamSkxWdgWfZGeE49eUOWJPA4c= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52/go.mod h1:fdg+/X9Gg4AsAIzWpEHwnqd+QY3b7lajxyjE1m4hkq4= +github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa h1:s+4MhCQ6YrzisK6hFJUX53drDT4UsSW3DEhKn0ifuHw= +github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa/go.mod h1:a/s9Lp5W7n/DD0VrVoyJ00FbP2ytTPDVOivvn2bMlds= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk= +github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.4.1 h1:oKfB/FhuVtit1bBM3zNRRsZ925ZkMN3HXL+LgLUM9lE= +github.com/jackc/pgx/v5 v5.4.1/go.mod h1:q6iHT8uDNXWiFNOlRqJzBTaSH3+2xCXkokxHZC5qWFY= +github.com/jackc/puddle/v2 v2.2.0 h1:RdcDk92EJBuBS55nQMMYFXTxwstHug4jkhT5pq8VxPk= +github.com/jackc/puddle/v2 v2.2.0/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jackpal/gateway v1.0.5/go.mod h1:lTpwd4ACLXmpyiCTRtfiNyVnUmqT9RivzCDQetPfnjA= github.com/jackpal/go-nat-pmp v1.0.1/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus= @@ -945,6 +959,7 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c= github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8= +github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= github.com/libp2p/go-addr-util v0.0.1/go.mod h1:4ac6O7n9rIAKB1dnd+s8IbbMXkt+oBpzX4/+RACcnlQ= github.com/libp2p/go-addr-util v0.0.2/go.mod h1:Ecd6Fb3yIuLzq4bD7VcywcVSBtefcAwnUISBM3WG15E= github.com/libp2p/go-buffer-pool v0.0.1/go.mod h1:xtyIz9PMobb13WaxR6Zo1Pd1zXJKYg0a8KiIvDp3TzQ= @@ -1415,8 +1430,8 @@ github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqr github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= github.com/prometheus/client_golang v1.12.2/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= github.com/prometheus/client_golang v1.13.0/go.mod h1:vTeo+zgvILHsnnj/39Ou/1fPN5nJFOEMgftOUOmlvYQ= -github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= -github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -1450,8 +1465,8 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= -github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= -github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= +github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= github.com/prometheus/statsd_exporter v0.22.7 h1:7Pji/i2GuhK6Lu7DHrtTkFmNBCudCPT1pX2CziuyQR0= github.com/prometheus/statsd_exporter v0.22.7/go.mod h1:N/TevpjkIh9ccs6nuzY3jQn9dFqnUakOjnEuMPJJJnI= github.com/puzpuzpuz/xsync/v2 v2.4.0 h1:5sXAMHrtx1bg9nbRZTOn8T4MkWe5V+o8yKRH02Eznag= @@ -1486,6 +1501,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= +github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sercand/kuberesolver v2.4.0+incompatible h1:WE2OlRf6wjLxHwNkkFLQGaZcVLEXjMjBPjjEU5vksH8= @@ -1596,10 +1613,9 @@ github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= -github.com/ugorji/go v1.2.6/go.mod h1:anCg0y61KIhDlPZmnH+so+RQbysYVyDko0IMgJv0Nn0= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= -github.com/ugorji/go/codec v1.2.6 h1:7kbGefxLoDBuYXOms4yD7223OpNMMPNPZxXk5TvFcyQ= -github.com/ugorji/go/codec v1.2.6/go.mod h1:V6TCNZ4PHqoHGFZuSG1W8nrCzzdgA2DozYxWFFpvxTw= +github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= diff --git a/itests/harmonydb_test.go b/itests/harmonydb_test.go new file mode 100644 index 000000000..8b1b61234 --- /dev/null +++ b/itests/harmonydb_test.go @@ -0,0 +1,174 @@ +package itests + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "testing" + + "github.com/filecoin-project/lotus/itests/kit" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/node/impl" +) + +func withSetup(t *testing.T, f func(*kit.TestMiner)) { + _, miner, _ := kit.EnsembleMinimal(t, + kit.LatestActorsAt(-1), + kit.MockProofs(), + kit.WithSectorIndexDB(), + ) + + f(miner) +} + +func TestCrud(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + withSetup(t, func(miner *kit.TestMiner) { + cdb := miner.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + _, err := cdb.Exec(ctx, ` + INSERT INTO + itest_scratch (some_int, content) + VALUES + (11, 'cows'), + (5, 'cats') + `) + if err != nil { + t.Fatal("Could not insert: ", err) + } + var ints []struct { + Count int `db:"some_int"` + Animal string `db:"content"` + Unpopulated int + } + err = cdb.Select(ctx, &ints, "SELECT content, some_int FROM itest_scratch") + if err != nil { + t.Fatal("Could not select: ", err) + } + if len(ints) != 2 { + t.Fatal("unexpected count of returns. Want 2, Got ", len(ints)) + } + if ints[0].Count != 11 || ints[1].Count != 5 { + t.Fatal("expected [11,5] got ", ints) + } + if ints[0].Animal != "cows" || ints[1].Animal != "cats" { + t.Fatal("expected, [cows, cats] ", ints) + } + fmt.Println("test completed") + }) +} + +func TestTransaction(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + withSetup(t, func(miner *kit.TestMiner) { + cdb := miner.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + if _, err := cdb.Exec(ctx, "INSERT INTO itest_scratch (some_int) VALUES (4), (5), (6)"); err != nil { + t.Fatal("E0", err) + } + _, err := cdb.BeginTransaction(ctx, func(tx *harmonydb.Tx) (commit bool, err error) { + if _, err := tx.Exec("INSERT INTO itest_scratch (some_int) VALUES (7), (8), (9)"); err != nil { + t.Fatal("E1", err) + } + + // sum1 is read from OUTSIDE the transaction so it's the old value + var sum1 int + if err := cdb.QueryRow(ctx, "SELECT SUM(some_int) FROM itest_scratch").Scan(&sum1); err != nil { + t.Fatal("E2", err) + } + if sum1 != 4+5+6 { + t.Fatal("Expected 15, got ", sum1) + } + + // sum2 is from INSIDE the transaction, so the updated value. + var sum2 int + if err := tx.QueryRow("SELECT SUM(some_int) FROM itest_scratch").Scan(&sum2); err != nil { + t.Fatal("E3", err) + } + if sum2 != 4+5+6+7+8+9 { + t.Fatal("Expected 39, got ", sum2) + } + return false, nil // rollback + }) + if err != nil { + t.Fatal("ET", err) + } + + var sum2 int + // Query() example (yes, QueryRow would be preferred here) + q, err := cdb.Query(ctx, "SELECT SUM(some_int) FROM itest_scratch") + if err != nil { + t.Fatal("E4", err) + } + defer q.Close() + var rowCt int + for q.Next() { + err := q.Scan(&sum2) + if err != nil { + t.Fatal("error scanning ", err) + } + rowCt++ + } + if sum2 != 4+5+6 { + t.Fatal("Expected 15, got ", sum2) + } + if rowCt != 1 { + t.Fatal("unexpected count of rows") + } + }) +} + +func TestPartialWalk(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + withSetup(t, func(miner *kit.TestMiner) { + cdb := miner.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + if _, err := cdb.Exec(ctx, ` + INSERT INTO + itest_scratch (content, some_int) + VALUES + ('andy was here', 5), + ('lotus is awesome', 6), + ('hello world', 7), + ('3rd integration test', 8), + ('fiddlesticks', 9) + `); err != nil { + t.Fatal("e1", err) + } + + // TASK: FIND THE ID of the string with a specific SHA256 + needle := "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" + q, err := cdb.Query(ctx, `SELECT id, content FROM itest_scratch`) + if err != nil { + t.Fatal("e2", err) + } + defer q.Close() + + var tmp struct { + Src string `db:"content"` + ID int + } + + var done bool + for q.Next() { + + if err := q.StructScan(&tmp); err != nil { + t.Fatal("structscan err " + err.Error()) + } + + bSha := sha256.Sum256([]byte(tmp.Src)) + if hex.EncodeToString(bSha[:]) == needle { + done = true + break + } + } + if !done { + t.Fatal("We didn't find it.") + } + // Answer: tmp.ID + }) +} diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go new file mode 100644 index 000000000..ab54cbef4 --- /dev/null +++ b/itests/harmonytask_test.go @@ -0,0 +1,266 @@ +package itests + +import ( + "context" + "errors" + "fmt" + "sort" + "sync" + "testing" + "time" + + logging "github.com/ipfs/go-log/v2" + "github.com/stretchr/testify/require" + + "github.com/filecoin-project/lotus/itests/kit" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/node/impl" +) + +type task1 struct { + toAdd []int + myPersonalTableLock sync.Mutex + myPersonalTable map[harmonytask.TaskID]int // This would typically be a DB table + WorkCompleted []string +} + +func withDbSetup(t *testing.T, f func(*kit.TestMiner)) { + _, miner, _ := kit.EnsembleMinimal(t, + kit.LatestActorsAt(-1), + kit.MockProofs(), + kit.WithSectorIndexDB(), + ) + logging.SetLogLevel("harmonytask", "debug") + + f(miner) +} + +func (t *task1) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + if !stillOwned() { + return false, errors.New("Why not still owned?") + } + t.myPersonalTableLock.Lock() + defer t.myPersonalTableLock.Unlock() + t.WorkCompleted = append(t.WorkCompleted, fmt.Sprintf("taskResult%d", t.myPersonalTable[tID])) + return true, nil +} +func (t *task1) CanAccept(list []harmonytask.TaskID, e *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + return &list[0], nil +} +func (t *task1) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Max: 100, + Name: "ThingOne", + MaxFailures: 1, + Cost: resources.Resources{ + Cpu: 1, + Ram: 100 << 10, // at 100kb, it's tiny + }, + } +} +func (t *task1) Adder(add harmonytask.AddTaskFunc) { + for _, vTmp := range t.toAdd { + v := vTmp + add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) (bool, error) { + t.myPersonalTableLock.Lock() + defer t.myPersonalTableLock.Unlock() + + t.myPersonalTable[tID] = v + return true, nil + }) + } +} + +func init() { + //logging.SetLogLevel("harmonydb", "debug") + //logging.SetLogLevel("harmonytask", "debug") +} + +func TestHarmonyTasks(t *testing.T) { + //t.Parallel() + withDbSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + t1 := &task1{ + toAdd: []int{56, 73}, + myPersonalTable: map[harmonytask.TaskID]int{}, + } + harmonytask.POLL_DURATION = time.Millisecond * 100 + e, err := harmonytask.New(cdb, []harmonytask.TaskInterface{t1}, "test:1") + require.NoError(t, err) + time.Sleep(time.Second) // do the work. FLAKYNESS RISK HERE. + e.GracefullyTerminate(time.Minute) + expected := []string{"taskResult56", "taskResult73"} + sort.Strings(t1.WorkCompleted) + require.Equal(t, expected, t1.WorkCompleted, "unexpected results") + }) +} + +type passthru struct { + dtl harmonytask.TaskTypeDetails + do func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) + canAccept func(list []harmonytask.TaskID, e *harmonytask.TaskEngine) (*harmonytask.TaskID, error) + adder func(add harmonytask.AddTaskFunc) +} + +func (t *passthru) Do(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + return t.do(tID, stillOwned) +} +func (t *passthru) CanAccept(list []harmonytask.TaskID, e *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + return t.canAccept(list, e) +} +func (t *passthru) TypeDetails() harmonytask.TaskTypeDetails { + return t.dtl +} +func (t *passthru) Adder(add harmonytask.AddTaskFunc) { + if t.adder != nil { + t.adder(add) + } +} + +// Common stuff +var dtl = harmonytask.TaskTypeDetails{Name: "foo", Max: -1, Cost: resources.Resources{}} +var lettersMutex sync.Mutex + +func fooLetterAdder(t *testing.T, cdb *harmonydb.DB) *passthru { + return &passthru{ + dtl: dtl, + canAccept: func(list []harmonytask.TaskID, e *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + return nil, nil + }, + adder: func(add harmonytask.AddTaskFunc) { + for _, vTmp := range []string{"A", "B"} { + v := vTmp + add(func(tID harmonytask.TaskID, tx *harmonydb.Tx) (bool, error) { + _, err := tx.Exec("INSERT INTO itest_scratch (some_int, content) VALUES ($1,$2)", tID, v) + require.NoError(t, err) + return true, nil + }) + } + }, + } +} +func fooLetterSaver(t *testing.T, cdb *harmonydb.DB, dest *[]string) *passthru { + return &passthru{ + dtl: dtl, + canAccept: func(list []harmonytask.TaskID, e *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + return &list[0], nil + }, + do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + var content string + err = cdb.QueryRow(context.Background(), + "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) + require.NoError(t, err) + lettersMutex.Lock() + defer lettersMutex.Unlock() + *dest = append(*dest, content) + return true, nil + }, + } +} + +func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { + //t.Parallel() + withDbSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + senderParty := fooLetterAdder(t, cdb) + var dest []string + workerParty := fooLetterSaver(t, cdb, &dest) + harmonytask.POLL_DURATION = time.Millisecond * 100 + sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") + require.NoError(t, err) + worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{workerParty}, "test:2") + require.NoError(t, err) + time.Sleep(time.Second) // do the work. FLAKYNESS RISK HERE. + sender.GracefullyTerminate(time.Second * 5) + worker.GracefullyTerminate(time.Second * 5) + sort.Strings(dest) + require.Equal(t, []string{"A", "B"}, dest) + }) +} + +func TestWorkStealing(t *testing.T) { + //t.Parallel() + withDbSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + ctx := context.Background() + + // The dead worker will be played by a few SQL INSERTS. + _, err := cdb.Exec(ctx, `INSERT INTO harmony_machines + (id, last_contact,host_and_port, cpu, ram, gpu) + VALUES (300, DATE '2000-01-01', 'test:1', 4, 400000, 1)`) + require.ErrorIs(t, err, nil) + _, err = cdb.Exec(ctx, `INSERT INTO harmony_task + (id, name, owner_id, posted_time, added_by) + VALUES (1234, 'foo', 300, DATE '2000-01-01', 300)`) + require.ErrorIs(t, err, nil) + _, err = cdb.Exec(ctx, "INSERT INTO itest_scratch (some_int, content) VALUES (1234, 'M')") + require.ErrorIs(t, err, nil) + + harmonytask.POLL_DURATION = time.Millisecond * 100 + harmonytask.CLEANUP_FREQUENCY = time.Millisecond * 100 + var dest []string + worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb, &dest)}, "test:2") + require.ErrorIs(t, err, nil) + time.Sleep(time.Second) // do the work. FLAKYNESS RISK HERE. + worker.GracefullyTerminate(time.Second * 5) + require.Equal(t, []string{"M"}, dest) + }) +} + +func TestTaskRetry(t *testing.T) { + //t.Parallel() + withDbSetup(t, func(m *kit.TestMiner) { + cdb := m.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB + senderParty := fooLetterAdder(t, cdb) + harmonytask.POLL_DURATION = time.Millisecond * 100 + sender, err := harmonytask.New(cdb, []harmonytask.TaskInterface{senderParty}, "test:1") + require.NoError(t, err) + + alreadyFailed := map[string]bool{} + var dest []string + fails2xPerMsg := &passthru{ + dtl: dtl, + canAccept: func(list []harmonytask.TaskID, e *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + return &list[0], nil + }, + do: func(tID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + var content string + err = cdb.QueryRow(context.Background(), + "SELECT content FROM itest_scratch WHERE some_int=$1", tID).Scan(&content) + require.NoError(t, err) + lettersMutex.Lock() + defer lettersMutex.Unlock() + if !alreadyFailed[content] { + alreadyFailed[content] = true + return false, errors.New("intentional 'error'") + } + dest = append(dest, content) + return true, nil + }, + } + rcv, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fails2xPerMsg}, "test:2") + require.NoError(t, err) + time.Sleep(time.Second) + sender.GracefullyTerminate(time.Hour) + rcv.GracefullyTerminate(time.Hour) + sort.Strings(dest) + require.Equal(t, []string{"A", "B"}, dest) + type hist struct { + TaskID int + Result bool + Err string + } + var res []hist + require.NoError(t, cdb.Select(context.Background(), &res, + `SELECT task_id, result, err FROM harmony_task_history + ORDER BY result DESC, task_id`)) + + require.Equal(t, []hist{ + {1, true, ""}, + {2, true, ""}, + {1, false, "error: intentional 'error'"}, + {2, false, "error: intentional 'error'"}}, res) + }) +} diff --git a/itests/kit/ensemble.go b/itests/kit/ensemble.go index 45f21786d..3c83ba896 100644 --- a/itests/kit/ensemble.go +++ b/itests/kit/ensemble.go @@ -49,11 +49,13 @@ import ( "github.com/filecoin-project/lotus/cmd/lotus-worker/sealworker" "github.com/filecoin-project/lotus/gateway" "github.com/filecoin-project/lotus/genesis" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" "github.com/filecoin-project/lotus/markets/idxprov" "github.com/filecoin-project/lotus/markets/idxprov/idxprov_test" lotusminer "github.com/filecoin-project/lotus/miner" "github.com/filecoin-project/lotus/node" "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/node/impl" "github.com/filecoin-project/lotus/node/modules" "github.com/filecoin-project/lotus/node/modules/dtypes" testing2 "github.com/filecoin-project/lotus/node/modules/testing" @@ -359,6 +361,8 @@ func (n *Ensemble) Start() *Ensemble { n.mn = mocknet.New() } + sharedITestID := harmonydb.ITestNewID() + // --------------------- // FULL NODES // --------------------- @@ -603,6 +607,7 @@ func (n *Ensemble) Start() *Ensemble { cfg.Subsystems.EnableMining = m.options.subsystems.Has(SMining) cfg.Subsystems.EnableSealing = m.options.subsystems.Has(SSealing) cfg.Subsystems.EnableSectorStorage = m.options.subsystems.Has(SSectorStorage) + cfg.Subsystems.EnableSectorIndexDB = m.options.subsystems.Has(SHarmony) cfg.Dealmaking.MaxStagingDealsBytes = m.options.maxStagingDealsBytes if m.options.mainMiner != nil { @@ -724,6 +729,17 @@ func (n *Ensemble) Start() *Ensemble { // upgrades node.Override(new(stmgr.UpgradeSchedule), n.options.upgradeSchedule), + + node.Override(new(harmonydb.ITestID), sharedITestID), + node.Override(new(config.HarmonyDB), func() config.HarmonyDB { + return config.HarmonyDB{ + Hosts: []string{envElse("LOTUS_HARMONYDB_HOSTS", "127.0.0.1")}, + Database: "yugabyte", + Username: "yugabyte", + Password: "yugabyte", + Port: "5433", + } + }), } if m.options.subsystems.Has(SMarkets) { @@ -770,6 +786,12 @@ func (n *Ensemble) Start() *Ensemble { require.NoError(n.t, err) n.t.Cleanup(func() { _ = stop(context.Background()) }) + mCopy := m + n.t.Cleanup(func() { + if mCopy.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB != nil { + mCopy.BaseAPI.(*impl.StorageMinerAPI).HarmonyDB.ITestDeleteAll() + } + }) m.BaseAPI = m.StorageMiner @@ -826,6 +848,8 @@ func (n *Ensemble) Start() *Ensemble { auth := http.Header(nil) + // FUTURE: Use m.MinerNode.(BaseAPI).(impl.StorageMinerAPI).HarmonyDB to setup. + remote := paths.NewRemote(localStore, m.MinerNode, auth, 20, &paths.DefaultPartialFileHandler{}) store := m.options.workerStorageOpt(remote) @@ -855,6 +879,7 @@ func (n *Ensemble) Start() *Ensemble { require.NoError(n.t, err) n.active.workers = append(n.active.workers, m) + } // If we are here, we have processed all inactive workers and moved them @@ -1067,3 +1092,10 @@ func importPreSealMeta(ctx context.Context, meta genesis.Miner, mds dtypes.Metad size := binary.PutUvarint(buf, uint64(maxSectorID)) return mds.Put(ctx, datastore.NewKey(pipeline.StorageCounterDSPrefix), buf[:size]) } + +func envElse(env, els string) string { + if v := os.Getenv(env); v != "" { + return v + } + return els +} diff --git a/itests/kit/node_miner.go b/itests/kit/node_miner.go index 4b81c9df0..ee2ee3eaa 100644 --- a/itests/kit/node_miner.go +++ b/itests/kit/node_miner.go @@ -37,6 +37,8 @@ const ( SSealing SSectorStorage + SHarmony + MinerSubsystems = iota ) diff --git a/itests/kit/node_opts.go b/itests/kit/node_opts.go index 6469c0a30..9af284148 100644 --- a/itests/kit/node_opts.go +++ b/itests/kit/node_opts.go @@ -89,6 +89,13 @@ func WithAllSubsystems() NodeOpt { } } +func WithSectorIndexDB() NodeOpt { + return func(opts *nodeOpts) error { + opts.subsystems = opts.subsystems.Add(SHarmony) + return nil + } +} + func WithSubsystems(systems ...MinerSubsystem) NodeOpt { return func(opts *nodeOpts) error { for _, s := range systems { diff --git a/itests/path_type_filters_test.go b/itests/path_type_filters_test.go index d41e2c215..c668976ac 100644 --- a/itests/path_type_filters_test.go +++ b/itests/path_type_filters_test.go @@ -15,6 +15,7 @@ import ( ) func TestPathTypeFilters(t *testing.T) { + runTest := func(t *testing.T, name string, asserts func(t *testing.T, ctx context.Context, miner *kit.TestMiner, run func())) { t.Run(name, func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) diff --git a/itests/wdpost_worker_config_test.go b/itests/wdpost_worker_config_test.go index d1672c20f..ca22d82e4 100644 --- a/itests/wdpost_worker_config_test.go +++ b/itests/wdpost_worker_config_test.go @@ -139,7 +139,7 @@ func TestWindowPostNoBuiltinWindowWithWorker(t *testing.T) { t.Log("post message landed") - bm.MineBlocks(ctx, 2*time.Millisecond) + bm.MineBlocksMustPost(ctx, 2*time.Millisecond) waitUntil = di.Open + di.WPoStChallengeWindow*3 t.Logf("End for head.Height > %d", waitUntil) diff --git a/itests/worker_test.go b/itests/worker_test.go index c4f885fb0..31ec40b59 100644 --- a/itests/worker_test.go +++ b/itests/worker_test.go @@ -146,7 +146,7 @@ func TestWindowPostWorker(t *testing.T) { di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) require.NoError(t, err) - bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0] + bm := ens.InterconnectAll().BeginMiningMustPost(2 * time.Millisecond)[0] di = di.NextNotElapsed() @@ -172,7 +172,7 @@ func TestWindowPostWorker(t *testing.T) { t.Log("post message landed") - bm.MineBlocks(ctx, 2*time.Millisecond) + bm.MineBlocksMustPost(ctx, 2*time.Millisecond) waitUntil = di.Open + di.WPoStChallengeWindow*3 t.Logf("End for head.Height > %d", waitUntil) @@ -235,6 +235,8 @@ func TestWindowPostWorker(t *testing.T) { type badWorkerStorage struct { paths.Store + t *testing.T + badsector *uint64 notBadCount int } @@ -242,10 +244,12 @@ type badWorkerStorage struct { func (bs *badWorkerStorage) GenerateSingleVanillaProof(ctx context.Context, minerID abi.ActorID, si storiface.PostSectorChallenge, ppt abi.RegisteredPoStProof) ([]byte, error) { if atomic.LoadUint64(bs.badsector) == uint64(si.SectorNumber) { bs.notBadCount-- + bs.t.Logf("Generating proof for sector %d maybe bad nbc=%d", si.SectorNumber, bs.notBadCount) if bs.notBadCount < 0 { return nil, xerrors.New("no proof for you") } } + bs.t.Logf("Generating proof for sector %d", si.SectorNumber) return bs.Store.GenerateSingleVanillaProof(ctx, minerID, si, ppt) } @@ -268,6 +272,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) { return &badWorkerStorage{ Store: store, badsector: &badsector, + t: t, } }), kit.ConstructorOpts(node.ApplyIf(node.IsType(repo.StorageMiner), @@ -275,6 +280,7 @@ func TestWindowPostWorkerSkipBadSector(t *testing.T) { return &badWorkerStorage{ Store: store, badsector: &badsector, + t: t, notBadCount: 1, } })))) @@ -506,157 +512,6 @@ func TestWorkerName(t *testing.T) { require.True(t, found) } -// Tests that V1_1 proofs on post workers with faults -func TestWindowPostV1P1NV20WorkerFault(t *testing.T) { - kit.QuietMiningLogs() - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - blocktime := 2 * time.Millisecond - - sectors := 2 * 48 * 2 - var badsector uint64 = 100000 - - client, miner, _, ens := kit.EnsembleWorker(t, - kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines - kit.GenesisNetworkVersion(network.Version20), - kit.ConstructorOpts( - node.Override(new(config.ProvingConfig), func() config.ProvingConfig { - c := config.DefaultStorageMiner() - c.Proving.DisableBuiltinWindowPoSt = true - return c.Proving - }), - node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler( - config.DefaultStorageMiner().Fees, - config.ProvingConfig{ - DisableBuiltinWindowPoSt: true, - DisableBuiltinWinningPoSt: false, - DisableWDPoStPreChecks: false, - }, - )), - node.Override(new(paths.Store), func(store *paths.Remote) paths.Store { - return &badWorkerStorage{ - Store: store, - badsector: &badsector, - notBadCount: 1, - } - })), - kit.ThroughRPC(), - kit.WithTaskTypes([]sealtasks.TaskType{sealtasks.TTGenerateWindowPoSt}), - kit.WithWorkerStorage(func(store paths.Store) paths.Store { - return &badWorkerStorage{ - Store: store, - badsector: &badsector, - } - })) - - bm := ens.InterconnectAll().BeginMining(blocktime)[0] - - maddr, err := miner.ActorAddress(ctx) - require.NoError(t, err) - - // wait for sectors to be committed - require.Eventually(t, func() bool { - di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) - require.NoError(t, err) - - parts, err := client.StateMinerPartitions(ctx, maddr, di.Index, types.EmptyTSK) - require.NoError(t, err) - - return len(parts) > 1 - }, 30*time.Second, 100*time.Millisecond) - - // Wait until just before a deadline opens - { - di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) - require.NoError(t, err) - - di = di.NextNotElapsed() - - t.Log("Running one proving period") - waitUntil := di.Open + di.WPoStChallengeWindow - di.WPoStChallengeLookback - 1 - client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) - - t.Log("Waiting for post message") - bm.Stop() - } - - // Remove one sector in the next deadline (so it's skipped) - { - di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) - require.NoError(t, err) - - parts, err := client.StateMinerPartitions(ctx, maddr, di.Index+1, types.EmptyTSK) - require.NoError(t, err) - require.Greater(t, len(parts), 0) - - secs := parts[0].AllSectors - n, err := secs.Count() - require.NoError(t, err) - require.Equal(t, uint64(2), n) - - // Drop the sector in first partition - sid, err := secs.First() - require.NoError(t, err) - - t.Logf("Drop sector %d; dl %d part %d", sid, di.Index, 0) - - atomic.StoreUint64(&badsector, sid) - require.NoError(t, err) - } - - bm.MineBlocksMustPost(ctx, 2*time.Millisecond) - - mi, err := client.StateMinerInfo(ctx, maddr, types.EmptyTSK) - require.NoError(t, err) - - wact, err := client.StateGetActor(ctx, mi.Worker, types.EmptyTSK) - require.NoError(t, err) - en := wact.Nonce - - // wait for a new message to be sent from worker address, it will be a PoSt - -waitForProof: - for { - //stm: @CHAIN_STATE_GET_ACTOR_001 - wact, err := client.StateGetActor(ctx, mi.Worker, types.EmptyTSK) - require.NoError(t, err) - if wact.Nonce > en { - break waitForProof - } - - build.Clock.Sleep(blocktime) - } - - slm, err := client.StateListMessages(ctx, &api.MessageMatch{To: maddr}, types.EmptyTSK, 0) - require.NoError(t, err) - - pmr, err := client.StateSearchMsg(ctx, types.EmptyTSK, slm[0], -1, false) - require.NoError(t, err) - - nv, err := client.StateNetworkVersion(ctx, pmr.TipSet) - require.NoError(t, err) - require.Equal(t, network.Version20, nv) - - require.True(t, pmr.Receipt.ExitCode.IsSuccess()) - - slmsg, err := client.ChainGetMessage(ctx, slm[0]) - require.NoError(t, err) - - var params miner11.SubmitWindowedPoStParams - require.NoError(t, params.UnmarshalCBOR(bytes.NewBuffer(slmsg.Params))) - require.Equal(t, abi.RegisteredPoStProof_StackedDrgWindow2KiBV1_1, params.Proofs[0].PoStProof) - - require.Len(t, params.Partitions, 2) - sc0, err := params.Partitions[0].Skipped.Count() - require.NoError(t, err) - require.Equal(t, uint64(1), sc0) - sc1, err := params.Partitions[1].Skipped.Count() - require.NoError(t, err) - require.Equal(t, uint64(0), sc1) -} - // Tests that V1_1 proofs on post worker func TestWindowPostV1P1NV20Worker(t *testing.T) { kit.QuietMiningLogs() @@ -685,7 +540,7 @@ func TestWindowPostV1P1NV20Worker(t *testing.T) { kit.ThroughRPC(), kit.WithTaskTypes([]sealtasks.TaskType{sealtasks.TTGenerateWindowPoSt})) - ens.InterconnectAll().BeginMining(blocktime) + ens.InterconnectAll().BeginMiningMustPost(blocktime) maddr, err := miner.ActorAddress(ctx) require.NoError(t, err) diff --git a/journal/fsjournal/fs.go b/journal/fsjournal/fs.go index 71aaa95a5..b2eb946fd 100644 --- a/journal/fsjournal/fs.go +++ b/journal/fsjournal/fs.go @@ -37,7 +37,11 @@ type fsJournal struct { // OpenFSJournal constructs a rolling filesystem journal, with a default // per-file size limit of 1GiB. func OpenFSJournal(lr repo.LockedRepo, disabled journal.DisabledEvents) (journal.Journal, error) { - dir := filepath.Join(lr.Path(), "journal") + return OpenFSJournalPath(lr.Path(), disabled) +} + +func OpenFSJournalPath(path string, disabled journal.DisabledEvents) (journal.Journal, error) { + dir := filepath.Join(path, "journal") if err := os.MkdirAll(dir, 0755); err != nil { return nil, fmt.Errorf("failed to mk directory %s for file journal: %w", dir, err) } diff --git a/journal/types.go b/journal/types.go index af21607bf..c32fe37e4 100644 --- a/journal/types.go +++ b/journal/types.go @@ -26,6 +26,9 @@ func ParseDisabledEvents(s string) (DisabledEvents, error) { s = strings.TrimSpace(s) // sanitize evts := strings.Split(s, ",") ret := make(DisabledEvents, 0, len(evts)) + if len(s) == 0 { + return ret, nil + } for _, evt := range evts { evt = strings.TrimSpace(evt) // sanitize s := strings.Split(evt, ":") diff --git a/lib/harmony/harmonydb/doc.go b/lib/harmony/harmonydb/doc.go new file mode 100644 index 000000000..ac60a0260 --- /dev/null +++ b/lib/harmony/harmonydb/doc.go @@ -0,0 +1,35 @@ +/* +# HarmonyDB provides database abstractions over SP-wide Postgres-compatible instance(s). + +# Features + + Rolling to secondary database servers on connection failure + Convenience features for Go + SQL + Prevention of SQL injection vulnerabilities + Monitors behavior via Prometheus stats and logging of errors. + +# Usage + +Processes should use New() to instantiate a *DB and keep it. +Consumers can use this *DB concurrently. +Creating and changing tables & views should happen in ./sql/ folder. +Name the file "today's date" in the format: YYYYMMDD.sql (ex: 20231231.sql for the year's last day) + + a. CREATE TABLE should NOT have a schema: + GOOD: CREATE TABLE foo (); + BAD: CREATE TABLE me.foo (); + b. Schema is managed for you. It provides isolation for integraton tests & multi-use. + c. Git Merges: All run once, so old-after-new is OK when there are no deps. + d. NEVER change shipped sql files. Have later files make corrections. + e. Anything not ran will be ran, so an older date making it to master is OK. + +Write SQL with context, raw strings, and args: + + name := "Alice" + var ID int + err := QueryRow(ctx, "SELECT id from people where first_name=?", name).Scan(&ID) + fmt.Println(ID) + +Note: Scan() is column-oriented, while Select() & StructScan() is field name/tag oriented. +*/ +package harmonydb diff --git a/lib/harmony/harmonydb/harmonydb.go b/lib/harmony/harmonydb/harmonydb.go new file mode 100644 index 000000000..0fed176d2 --- /dev/null +++ b/lib/harmony/harmonydb/harmonydb.go @@ -0,0 +1,297 @@ +package harmonydb + +import ( + "context" + "embed" + "fmt" + "math/rand" + "net" + "regexp" + "sort" + "strconv" + "strings" + "time" + + logging "github.com/ipfs/go-log/v2" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgxpool" + "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/node/config" +) + +type ITestID string + +// ItestNewID see ITestWithID doc +func ITestNewID() ITestID { + return ITestID(strconv.Itoa(rand.Intn(99999))) +} + +type DB struct { + pgx *pgxpool.Pool + cfg *pgxpool.Config + schema string + hostnames []string +} + +var logger = logging.Logger("harmonydb") + +// NewFromConfig is a convenience function. +// In usage: +// +// db, err := NewFromConfig(config.HarmonyDB) // in binary init +func NewFromConfig(cfg config.HarmonyDB) (*DB, error) { + return New( + cfg.Hosts, + cfg.Username, + cfg.Password, + cfg.Database, + cfg.Port, + "", + ) +} + +func NewFromConfigWithITestID(cfg config.HarmonyDB) func(id ITestID) (*DB, error) { + return func(id ITestID) (*DB, error) { + return New( + cfg.Hosts, + cfg.Username, + cfg.Password, + cfg.Database, + cfg.Port, + id, + ) + } +} + +// New is to be called once per binary to establish the pool. +// log() is for errors. It returns an upgraded database's connection. +// This entry point serves both production and integration tests, so it's more DI. +func New(hosts []string, username, password, database, port string, itestID ITestID) (*DB, error) { + itest := string(itestID) + connString := "" + if len(hosts) > 0 { + connString = "host=" + hosts[0] + " " + } + for k, v := range map[string]string{"user": username, "password": password, "dbname": database, "port": port} { + if strings.TrimSpace(v) != "" { + connString += k + "=" + v + " " + } + } + + schema := "lotus" + if itest != "" { + schema = "itest_" + itest + } + + if err := ensureSchemaExists(connString, schema); err != nil { + return nil, err + } + cfg, err := pgxpool.ParseConfig(connString + "search_path=" + schema) + if err != nil { + return nil, err + } + + // enable multiple fallback hosts. + for _, h := range hosts[1:] { + cfg.ConnConfig.Fallbacks = append(cfg.ConnConfig.Fallbacks, &pgconn.FallbackConfig{Host: h}) + } + + cfg.ConnConfig.OnNotice = func(conn *pgconn.PgConn, n *pgconn.Notice) { + logger.Debug("database notice: " + n.Message + ": " + n.Detail) + DBMeasures.Errors.M(1) + } + + db := DB{cfg: cfg, schema: schema, hostnames: hosts} // pgx populated in AddStatsAndConnect + if err := db.addStatsAndConnect(); err != nil { + return nil, err + } + + return &db, db.upgrade() +} + +type tracer struct { +} + +type ctxkey string + +const SQL_START = ctxkey("sqlStart") +const SQL_STRING = ctxkey("sqlString") + +func (t tracer) TraceQueryStart(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryStartData) context.Context { + return context.WithValue(context.WithValue(ctx, SQL_START, time.Now()), SQL_STRING, data.SQL) +} +func (t tracer) TraceQueryEnd(ctx context.Context, conn *pgx.Conn, data pgx.TraceQueryEndData) { + DBMeasures.Hits.M(1) + ms := time.Since(ctx.Value(SQL_START).(time.Time)).Milliseconds() + DBMeasures.TotalWait.M(ms) + DBMeasures.Waits.Observe(float64(ms)) + if data.Err != nil { + DBMeasures.Errors.M(1) + } + logger.Debugw("SQL run", + "query", ctx.Value(SQL_STRING).(string), + "err", data.Err, + "rowCt", data.CommandTag.RowsAffected(), + "milliseconds", ms) +} + +func (db *DB) GetRoutableIP() (string, error) { + tx, err := db.pgx.Begin(context.Background()) + if err != nil { + return "", err + } + defer func() { _ = tx.Rollback(context.Background()) }() + local := tx.Conn().PgConn().Conn().LocalAddr() + addr, ok := local.(*net.TCPAddr) + if !ok { + return "", fmt.Errorf("could not get local addr from %v", addr) + } + return addr.IP.String(), nil +} + +// addStatsAndConnect connects a prometheus logger. Be sure to run this before using the DB. +func (db *DB) addStatsAndConnect() error { + + db.cfg.ConnConfig.Tracer = tracer{} + + hostnameToIndex := map[string]float64{} + for i, h := range db.hostnames { + hostnameToIndex[h] = float64(i) + } + db.cfg.AfterConnect = func(ctx context.Context, c *pgx.Conn) error { + s := db.pgx.Stat() + DBMeasures.OpenConnections.M(int64(s.TotalConns())) + DBMeasures.WhichHost.Observe(hostnameToIndex[c.Config().Host]) + + //FUTURE place for any connection seasoning + return nil + } + + // Timeout the first connection so we know if the DB is down. + ctx, ctxClose := context.WithDeadline(context.Background(), time.Now().Add(5*time.Second)) + defer ctxClose() + var err error + db.pgx, err = pgxpool.NewWithConfig(ctx, db.cfg) + if err != nil { + logger.Error(fmt.Sprintf("Unable to connect to database: %v\n", err)) + return err + } + return nil +} + +// ITestDeleteAll will delete everything created for "this" integration test. +// This must be called at the end of each integration test. +func (db *DB) ITestDeleteAll() { + if !strings.HasPrefix(db.schema, "itest_") { + fmt.Println("Warning: this should never be called on anything but an itest schema.") + return + } + defer db.pgx.Close() + _, err := db.pgx.Exec(context.Background(), "DROP SCHEMA "+db.schema+" CASCADE") + if err != nil { + fmt.Println("warning: unclean itest shutdown: cannot delete schema: " + err.Error()) + return + } +} + +var schemaREString = "^[A-Za-z0-9_]+$" +var schemaRE = regexp.MustCompile(schemaREString) + +func ensureSchemaExists(connString, schema string) error { + // FUTURE allow using fallback DBs for start-up. + ctx, cncl := context.WithDeadline(context.Background(), time.Now().Add(3*time.Second)) + p, err := pgx.Connect(ctx, connString) + defer cncl() + if err != nil { + return xerrors.Errorf("unable to connect to db: %s, err: %v", connString, err) + } + defer func() { _ = p.Close(context.Background()) }() + + if len(schema) < 5 || !schemaRE.MatchString(schema) { + return xerrors.New("schema must be of the form " + schemaREString + "\n Got: " + schema) + } + _, err = p.Exec(context.Background(), "CREATE SCHEMA IF NOT EXISTS "+schema) + if err != nil { + return xerrors.Errorf("cannot create schema: %w", err) + } + return nil +} + +//go:embed sql +var fs embed.FS + +func (db *DB) upgrade() error { + // Does the version table exist? if not, make it. + // NOTE: This cannot change except via the next sql file. + _, err := db.Exec(context.Background(), `CREATE TABLE IF NOT EXISTS base ( + id SERIAL PRIMARY KEY, + entry CHAR(12), + applied TIMESTAMP DEFAULT current_timestamp + )`) + if err != nil { + logger.Error("Upgrade failed.") + return xerrors.Errorf("Cannot create base table %w", err) + } + + // __Run scripts in order.__ + + landed := map[string]bool{} + { + var landedEntries []struct{ Entry string } + err = db.Select(context.Background(), &landedEntries, "SELECT entry FROM base") + if err != nil { + logger.Error("Cannot read entries: " + err.Error()) + return xerrors.Errorf("cannot read entries: %w", err) + } + for _, l := range landedEntries { + landed[l.Entry[:8]] = true + } + } + dir, err := fs.ReadDir("sql") + if err != nil { + logger.Error("Cannot read fs entries: " + err.Error()) + return err + } + sort.Slice(dir, func(i, j int) bool { return dir[i].Name() < dir[j].Name() }) + + if len(dir) == 0 { + logger.Error("No sql files found.") + } + for _, e := range dir { + name := e.Name() + if !strings.HasSuffix(name, ".sql") { + logger.Debug("Must have only SQL files here, found: " + name) + continue + } + if landed[name[:8]] { + logger.Debug("DB Schema " + name + " already applied.") + continue + } + file, err := fs.ReadFile("sql/" + name) + if err != nil { + logger.Error("weird embed file read err") + return err + } + for _, s := range strings.Split(string(file), ";") { // Implement the changes. + if len(strings.TrimSpace(s)) == 0 { + continue + } + _, err = db.pgx.Exec(context.Background(), s) + if err != nil { + msg := fmt.Sprintf("Could not upgrade! File %s, Query: %s, Returned: %s", name, s, err.Error()) + logger.Error(msg) + return xerrors.New(msg) // makes devs lives easier by placing message at the end. + } + } + + // Mark Completed. + _, err = db.Exec(context.Background(), "INSERT INTO base (entry) VALUES ($1)", name[:8]) + if err != nil { + logger.Error("Cannot update base: " + err.Error()) + return xerrors.Errorf("cannot insert into base: %w", err) + } + } + return nil +} diff --git a/lib/harmony/harmonydb/metrics.go b/lib/harmony/harmonydb/metrics.go new file mode 100644 index 000000000..b29a76ad5 --- /dev/null +++ b/lib/harmony/harmonydb/metrics.go @@ -0,0 +1,77 @@ +package harmonydb + +import ( + "github.com/prometheus/client_golang/prometheus" + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" + + "github.com/filecoin-project/lotus/metrics" +) + +var ( + dbTag, _ = tag.NewKey("db_name") + pre = "harmonydb_base_" + waitsBuckets = []float64{0, 10, 20, 30, 50, 80, 130, 210, 340, 550, 890} + whichHostBuckets = []float64{0, 1, 2, 3, 4, 5} +) + +// DBMeasures groups all db metrics. +var DBMeasures = struct { + Hits *stats.Int64Measure + TotalWait *stats.Int64Measure + Waits prometheus.Histogram + OpenConnections *stats.Int64Measure + Errors *stats.Int64Measure + WhichHost prometheus.Histogram +}{ + Hits: stats.Int64(pre+"hits", "Total number of uses.", stats.UnitDimensionless), + TotalWait: stats.Int64(pre+"total_wait", "Total delay. A numerator over hits to get average wait.", stats.UnitMilliseconds), + Waits: prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: pre + "waits", + Buckets: waitsBuckets, + Help: "The histogram of waits for query completions.", + }), + OpenConnections: stats.Int64(pre+"open_connections", "Total connection count.", stats.UnitDimensionless), + Errors: stats.Int64(pre+"errors", "Total error count.", stats.UnitDimensionless), + WhichHost: prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: pre + "which_host", + Buckets: whichHostBuckets, + Help: "The index of the hostname being used", + }), +} + +// CacheViews groups all cache-related default views. +func init() { + metrics.RegisterViews( + &view.View{ + Measure: DBMeasures.Hits, + Aggregation: view.Sum(), + TagKeys: []tag.Key{dbTag}, + }, + &view.View{ + Measure: DBMeasures.TotalWait, + Aggregation: view.Sum(), + TagKeys: []tag.Key{dbTag}, + }, + &view.View{ + Measure: DBMeasures.OpenConnections, + Aggregation: view.LastValue(), + TagKeys: []tag.Key{dbTag}, + }, + &view.View{ + Measure: DBMeasures.Errors, + Aggregation: view.Sum(), + TagKeys: []tag.Key{dbTag}, + }, + ) + err := prometheus.Register(DBMeasures.Waits) + if err != nil { + panic(err) + } + + err = prometheus.Register(DBMeasures.WhichHost) + if err != nil { + panic(err) + } +} diff --git a/lib/harmony/harmonydb/sql/20230706.sql b/lib/harmony/harmonydb/sql/20230706.sql new file mode 100644 index 000000000..a4a333b81 --- /dev/null +++ b/lib/harmony/harmonydb/sql/20230706.sql @@ -0,0 +1,7 @@ +CREATE TABLE itest_scratch ( + id SERIAL PRIMARY KEY, + content TEXT, + some_int INTEGER, + second_int INTEGER, + update_time TIMESTAMP DEFAULT current_timestamp +) \ No newline at end of file diff --git a/lib/harmony/harmonydb/sql/20230712.sql b/lib/harmony/harmonydb/sql/20230712.sql new file mode 100644 index 000000000..750884c8c --- /dev/null +++ b/lib/harmony/harmonydb/sql/20230712.sql @@ -0,0 +1,45 @@ +create table sector_location +( + miner_id bigint not null, + sector_num bigint not null, + sector_filetype int not null, + storage_id varchar not null, + is_primary bool, + read_ts timestamp(6), + read_refs int, + write_ts timestamp(6), + write_lock_owner varchar, + constraint sectorlocation_pk + primary key (miner_id, sector_num, sector_filetype, storage_id) +); + +alter table sector_location + alter column read_refs set not null; + +alter table sector_location + alter column read_refs set default 0; + +create table storage_path +( + "storage_id" varchar not null + constraint "storage_path_pkey" + primary key, + "urls" varchar, -- comma separated list of urls + "weight" bigint, + "max_storage" bigint, + "can_seal" bool, + "can_store" bool, + "groups" varchar, -- comma separated list of group names + "allow_to" varchar, -- comma separated list of allowed groups + "allow_types" varchar, -- comma separated list of allowed file types + "deny_types" varchar, -- comma separated list of denied file types + + "capacity" bigint, + "available" bigint, + "fs_available" bigint, + "reserved" bigint, + "used" bigint, + "last_heartbeat" timestamp(6), + "heartbeat_err" varchar +); + diff --git a/lib/harmony/harmonydb/sql/20230719.sql b/lib/harmony/harmonydb/sql/20230719.sql new file mode 100644 index 000000000..e7b1795c5 --- /dev/null +++ b/lib/harmony/harmonydb/sql/20230719.sql @@ -0,0 +1,51 @@ +/* For HarmonyTask base implementation. */ + +CREATE TABLE harmony_machines ( + id SERIAL PRIMARY KEY NOT NULL, + last_contact TIMESTAMP NOT NULL DEFAULT current_timestamp, + host_and_port varchar(300) NOT NULL, + cpu INTEGER NOT NULL, + ram BIGINT NOT NULL, + gpu FLOAT NOT NULL +); + +CREATE TABLE harmony_task ( + id SERIAL PRIMARY KEY NOT NULL, + initiated_by INTEGER, + update_time TIMESTAMP NOT NULL DEFAULT current_timestamp, + posted_time TIMESTAMP NOT NULL, + owner_id INTEGER REFERENCES harmony_machines (id) ON DELETE SET NULL, + added_by INTEGER NOT NULL, + previous_task INTEGER, + name varchar(16) NOT NULL +); +COMMENT ON COLUMN harmony_task.initiated_by IS 'The task ID whose completion occasioned this task.'; +COMMENT ON COLUMN harmony_task.owner_id IS 'The foreign key to harmony_machines.'; +COMMENT ON COLUMN harmony_task.name IS 'The name of the task type.'; +COMMENT ON COLUMN harmony_task.owner_id IS 'may be null if between owners or not yet taken'; +COMMENT ON COLUMN harmony_task.update_time IS 'When it was last modified. not a heartbeat'; + +CREATE TABLE harmony_task_history ( + id SERIAL PRIMARY KEY NOT NULL, + task_id INTEGER NOT NULL, + name VARCHAR(16) NOT NULL, + posted TIMESTAMP NOT NULL, + work_start TIMESTAMP NOT NULL, + work_end TIMESTAMP NOT NULL, + result BOOLEAN NOT NULL, + err varchar +); +COMMENT ON COLUMN harmony_task_history.result IS 'Use to detemine if this was a successful run.'; + +CREATE TABLE harmony_task_follow ( + id SERIAL PRIMARY KEY NOT NULL, + owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, + to_type VARCHAR(16) NOT NULL, + from_type VARCHAR(16) NOT NULL +); + +CREATE TABLE harmony_task_impl ( + id SERIAL PRIMARY KEY NOT NULL, + owner_id INTEGER NOT NULL REFERENCES harmony_machines (id) ON DELETE CASCADE, + name VARCHAR(16) NOT NULL +); \ No newline at end of file diff --git a/lib/harmony/harmonydb/sql/20230823.sql b/lib/harmony/harmonydb/sql/20230823.sql new file mode 100644 index 000000000..c6f993d76 --- /dev/null +++ b/lib/harmony/harmonydb/sql/20230823.sql @@ -0,0 +1,48 @@ +create table wdpost_partition_tasks +( + task_id bigint not null + constraint wdpost_partition_tasks_pk + primary key, + sp_id bigint not null, + proving_period_start bigint not null, + deadline_index bigint not null, + partition_index bigint not null, + constraint wdpost_partition_tasks_identity_key + unique (sp_id, proving_period_start, deadline_index, partition_index) +); + +comment on column wdpost_partition_tasks.task_id is 'harmonytask task ID'; +comment on column wdpost_partition_tasks.sp_id is 'storage provider ID'; +comment on column wdpost_partition_tasks.proving_period_start is 'proving period start'; +comment on column wdpost_partition_tasks.deadline_index is 'deadline index within the proving period'; +comment on column wdpost_partition_tasks.partition_index is 'partition index within the deadline'; + +create table wdpost_proofs +( + sp_id bigint not null, + proving_period_start bigint not null, + deadline bigint not null, + partition bigint not null, + submit_at_epoch bigint not null, + submit_by_epoch bigint not null, + proof_params bytea, + + submit_task_id bigint, + message_cid text, + + constraint wdpost_proofs_identity_key + unique (sp_id, proving_period_start, deadline, partition) +); + +create table wdpost_recovery_tasks +( + task_id bigint not null + constraint wdpost_recovery_tasks_pk + primary key, + sp_id bigint not null, + proving_period_start bigint not null, + deadline_index bigint not null, + partition_index bigint not null, + constraint wdpost_recovery_tasks_identity_key + unique (sp_id, proving_period_start, deadline_index, partition_index) +); \ No newline at end of file diff --git a/lib/harmony/harmonydb/sql/20230919.sql b/lib/harmony/harmonydb/sql/20230919.sql new file mode 100644 index 000000000..84699a0d5 --- /dev/null +++ b/lib/harmony/harmonydb/sql/20230919.sql @@ -0,0 +1,5 @@ +CREATE TABLE harmony_config ( + id SERIAL PRIMARY KEY NOT NULL, + title VARCHAR(300) UNIQUE NOT NULL, + config TEXT NOT NULL +); \ No newline at end of file diff --git a/lib/harmony/harmonydb/sql/20231103.sql b/lib/harmony/harmonydb/sql/20231103.sql new file mode 100644 index 000000000..e70cf3738 --- /dev/null +++ b/lib/harmony/harmonydb/sql/20231103.sql @@ -0,0 +1,55 @@ +create table message_sends +( + from_key text not null, + to_addr text not null, + send_reason text not null, + send_task_id bigint not null, + + unsigned_data bytea not null, + unsigned_cid text not null, + + nonce bigint, + signed_data bytea, + signed_json jsonb, + signed_cid text, + + send_time timestamp default null, + send_success boolean default null, + send_error text, + + constraint message_sends_pk + primary key (send_task_id, from_key) +); + +comment on column message_sends.from_key is 'text f[1/3/4]... address'; +comment on column message_sends.to_addr is 'text f[0/1/2/3/4]... address'; +comment on column message_sends.send_reason is 'optional description of send reason'; +comment on column message_sends.send_task_id is 'harmony task id of the send task'; + +comment on column message_sends.unsigned_data is 'unsigned message data'; +comment on column message_sends.unsigned_cid is 'unsigned message cid'; + +comment on column message_sends.nonce is 'assigned message nonce, set while the send task is executing'; +comment on column message_sends.signed_data is 'signed message data, set while the send task is executing'; +comment on column message_sends.signed_cid is 'signed message cid, set while the send task is executing'; + +comment on column message_sends.send_time is 'time when the send task was executed, set after pushing the message to the network'; +comment on column message_sends.send_success is 'whether this message was broadcasted to the network already, null if not yet attempted, true if successful, false if failed'; +comment on column message_sends.send_error is 'error message if send_success is false'; + +create unique index message_sends_success_index + on message_sends (from_key, nonce) + where send_success is not false; + +comment on index message_sends_success_index is +'message_sends_success_index enforces sender/nonce uniqueness, it is a conditional index that only indexes rows where send_success is not false. This allows us to have multiple rows with the same sender/nonce, as long as only one of them was successfully broadcasted (true) to the network or is in the process of being broadcasted (null).'; + +create table message_send_locks +( + from_key text not null, + task_id bigint not null, + claimed_at timestamp not null, + + constraint message_send_locks_pk + primary key (from_key) +); diff --git a/lib/harmony/harmonydb/sql/20231110.sql b/lib/harmony/harmonydb/sql/20231110.sql new file mode 100644 index 000000000..15b478f4d --- /dev/null +++ b/lib/harmony/harmonydb/sql/20231110.sql @@ -0,0 +1,39 @@ +create table mining_tasks +( + task_id bigint not null + constraint mining_tasks_pk + primary key, + sp_id bigint not null, + epoch bigint not null, + base_compute_time timestamp not null, + + won bool not null default false, + mined_cid text, + mined_header jsonb, + mined_at timestamp, + + submitted_at timestamp, + + constraint mining_tasks_sp_epoch + unique (sp_id, epoch) +); + +create table mining_base_block +( + id bigserial not null + constraint mining_base_block_pk + primary key, + task_id bigint not null + constraint mining_base_block_mining_tasks_task_id_fk + references mining_tasks + on delete cascade, + sp_id bigint, + block_cid text not null, + + no_win bool not null default false, + + constraint mining_base_block_pk2 + unique (sp_id, task_id, block_cid) +); + +CREATE UNIQUE INDEX mining_base_block_cid_k ON mining_base_block (sp_id, block_cid) WHERE no_win = false; diff --git a/lib/harmony/harmonydb/sql/20231113.sql b/lib/harmony/harmonydb/sql/20231113.sql new file mode 100644 index 000000000..7a71d98ae --- /dev/null +++ b/lib/harmony/harmonydb/sql/20231113.sql @@ -0,0 +1 @@ +ALTER TABLE harmony_task_history ADD COLUMN completed_by_host_and_port varchar(300) NOT NULL; \ No newline at end of file diff --git a/lib/harmony/harmonydb/sql/20231120-testing1.sql b/lib/harmony/harmonydb/sql/20231120-testing1.sql new file mode 100644 index 000000000..71daaef69 --- /dev/null +++ b/lib/harmony/harmonydb/sql/20231120-testing1.sql @@ -0,0 +1,8 @@ +CREATE TABLE harmony_test ( + task_id bigint + constraint harmony_test_pk + primary key, + options text, + result text +); +ALTER TABLE wdpost_proofs ADD COLUMN test_task_id bigint; \ No newline at end of file diff --git a/lib/harmony/harmonydb/userfuncs.go b/lib/harmony/harmonydb/userfuncs.go new file mode 100644 index 000000000..788ca4a34 --- /dev/null +++ b/lib/harmony/harmonydb/userfuncs.go @@ -0,0 +1,158 @@ +package harmonydb + +import ( + "context" + "errors" + + "github.com/georgysavva/scany/v2/pgxscan" + "github.com/jackc/pgerrcode" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +// rawStringOnly is _intentionally_private_ to force only basic strings in SQL queries. +// In any package, raw strings will satisfy compilation. Ex: +// +// harmonydb.Exec("INSERT INTO version (number) VALUES (1)") +// +// This prevents SQL injection attacks where the input contains query fragments. +type rawStringOnly string + +// Exec executes changes (INSERT, DELETE, or UPDATE). +// Note, for CREATE & DROP please keep these permanent and express +// them in the ./sql/ files (next number). +func (db *DB) Exec(ctx context.Context, sql rawStringOnly, arguments ...any) (count int, err error) { + res, err := db.pgx.Exec(ctx, string(sql), arguments...) + return int(res.RowsAffected()), err +} + +type Qry interface { + Next() bool + Err() error + Close() + Scan(...any) error + Values() ([]any, error) +} + +// Query offers Next/Err/Close/Scan/Values/StructScan +type Query struct { + Qry +} + +// Query allows iterating returned values to save memory consumption +// with the downside of needing to `defer q.Close()`. For a simpler interface, +// try Select() +// Next() must be called to advance the row cursor, including the first time: +// Ex: +// q, err := db.Query(ctx, "SELECT id, name FROM users") +// handleError(err) +// defer q.Close() +// +// for q.Next() { +// var id int +// var name string +// handleError(q.Scan(&id, &name)) +// fmt.Println(id, name) +// } +func (db *DB) Query(ctx context.Context, sql rawStringOnly, arguments ...any) (*Query, error) { + q, err := db.pgx.Query(ctx, string(sql), arguments...) + return &Query{q}, err +} +func (q *Query) StructScan(s any) error { + return pgxscan.ScanRow(s, q.Qry.(pgx.Rows)) +} + +type Row interface { + Scan(...any) error +} + +// QueryRow gets 1 row using column order matching. +// This is a timesaver for the special case of wanting the first row returned only. +// EX: +// +// var name, pet string +// var ID = 123 +// err := db.QueryRow(ctx, "SELECT name, pet FROM users WHERE ID=?", ID).Scan(&name, &pet) +func (db *DB) QueryRow(ctx context.Context, sql rawStringOnly, arguments ...any) Row { + return db.pgx.QueryRow(ctx, string(sql), arguments...) +} + +/* +Select multiple rows into a slice using name matching +Ex: + + type user struct { + Name string + ID int + Number string `db:"tel_no"` + } + + var users []user + pet := "cat" + err := db.Select(ctx, &users, "SELECT name, id, tel_no FROM customers WHERE pet=?", pet) +*/ +func (db *DB) Select(ctx context.Context, sliceOfStructPtr any, sql rawStringOnly, arguments ...any) error { + return pgxscan.Select(ctx, db.pgx, sliceOfStructPtr, string(sql), arguments...) +} + +type Tx struct { + pgx.Tx + ctx context.Context +} + +// BeginTransaction is how you can access transactions using this library. +// The entire transaction happens in the function passed in. +// The return must be true or a rollback will occur. +func (db *DB) BeginTransaction(ctx context.Context, f func(*Tx) (commit bool, err error)) (didCommit bool, retErr error) { + tx, err := db.pgx.BeginTx(ctx, pgx.TxOptions{}) + if err != nil { + return false, err + } + var commit bool + defer func() { // Panic clean-up. + if !commit { + if tmp := tx.Rollback(ctx); tmp != nil { + retErr = tmp + } + } + }() + commit, err = f(&Tx{tx, ctx}) + if err != nil { + return false, err + } + if commit { + err = tx.Commit(ctx) + if err != nil { + return false, err + } + return true, nil + } + return false, nil +} + +// Exec in a transaction. +func (t *Tx) Exec(sql rawStringOnly, arguments ...any) (count int, err error) { + res, err := t.Tx.Exec(t.ctx, string(sql), arguments...) + return int(res.RowsAffected()), err +} + +// Query in a transaction. +func (t *Tx) Query(sql rawStringOnly, arguments ...any) (*Query, error) { + q, err := t.Tx.Query(t.ctx, string(sql), arguments...) + return &Query{q}, err +} + +// QueryRow in a transaction. +func (t *Tx) QueryRow(sql rawStringOnly, arguments ...any) Row { + return t.Tx.QueryRow(t.ctx, string(sql), arguments...) +} + +// Select in a transaction. +func (t *Tx) Select(sliceOfStructPtr any, sql rawStringOnly, arguments ...any) error { + return pgxscan.Select(t.ctx, t.Tx, sliceOfStructPtr, string(sql), arguments...) +} + +func IsErrUniqueContraint(err error) bool { + var e2 *pgconn.PgError + return errors.As(err, &e2) && e2.Code == pgerrcode.UniqueViolation +} diff --git a/lib/harmony/harmonytask/doc.go b/lib/harmony/harmonytask/doc.go new file mode 100644 index 000000000..f9e5a9898 --- /dev/null +++ b/lib/harmony/harmonytask/doc.go @@ -0,0 +1,71 @@ +/* +Package harmonytask implements a pure (no task logic), distributed +task manager. This clean interface allows a task implementer to completely +avoid being concerned with task scheduling and management. +It's based on the idea of tasks as small units of work broken from other +work by hardware, parallelizabilty, reliability, or any other reason. +Workers will be Greedy: vaccuuming up their favorite jobs from a list. +Once 1 task is accepted, harmonydb tries to get other task runner +machines to accept work (round robin) before trying again to accept. +* +Mental Model: + + Things that block tasks: + - task not registered for any running server + - max was specified and reached + - resource exhaustion + - CanAccept() interface (per-task implmentation) does not accept it. + Ways tasks start: + - DB Read every 3 seconds + - Task was added (to db) by this process + Ways tasks get added: + - Async Listener task (for chain, etc) + - Followers: Tasks get added because another task completed + When Follower collectors run: + - If both sides are process-local, then this process will pick it up. + - If properly registered already, the http endpoint will be tried to start it. + - Otherwise, at the listen interval during db scrape it will be found. + How duplicate tasks are avoided: + - that's up to the task definition, but probably a unique key + +* +To use: +1.Implement TaskInterface for a new task. +2. Have New() receive this & all other ACTIVE implementations. +* +* +As we are not expecting DBAs in this database, it's important to know +what grows uncontrolled. The only growing harmony_* table is +harmony_task_history (somewhat quickly). These will need a +clean-up for after the task data could never be acted upon. +but the design **requires** extraInfo tables to grow until the task's +info could not possibly be used by a following task, including slow +release rollout. This would normally be in the order of months old. +* +Other possible enhancements include more collaborative coordination +to assign a task to machines closer to the data. + +__Database_Behavior__ +harmony_task is the list of work that has not been completed. + + AddTaskFunc manages the additions, but is designed to have its + transactions failed-out on overlap with a similar task already written. + It's up to the TaskInterface implementer to discover this overlap via + some other table it uses (since overlap can mean very different things). + +harmony_task_history + + This holds transactions that completed or saw too many retries. It also + serves as input for subsequent (follower) tasks to kick off. This is not + done machine-internally because a follower may not be on the same machine + as the previous task. + +harmony_task_machines + + Managed by lib/harmony/resources, this is a reference to machines registered + via the resources. This registration does not obligate the machine to + anything, but serves as a discovery mechanism. Paths are hostnames + ports + which are presumed to support http, but this assumption is only used by + the task system. +*/ +package harmonytask diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go new file mode 100644 index 000000000..595e5b63a --- /dev/null +++ b/lib/harmony/harmonytask/harmonytask.go @@ -0,0 +1,302 @@ +package harmonytask + +import ( + "context" + "fmt" + "strconv" + "sync/atomic" + "time" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/resources" +) + +// Consts (except for unit test) +var POLL_DURATION = time.Second * 3 // Poll for Work this frequently +var CLEANUP_FREQUENCY = 5 * time.Minute // Check for dead workers this often * everyone +var FOLLOW_FREQUENCY = 1 * time.Minute // Check for work to follow this often + +type TaskTypeDetails struct { + // Max returns how many tasks this machine can run of this type. + // Zero (default) or less means unrestricted. + Max int + + // Name is the task name to be added to the task list. + Name string + + // Peak costs to Do() the task. + Cost resources.Resources + + // Max Failure count before the job is dropped. + // 0 = retry forever + MaxFailures uint + + // Follow another task's completion via this task's creation. + // The function should populate extraInfo from data + // available from the previous task's tables, using the given TaskID. + // It should also return success if the trigger succeeded. + // NOTE: if refatoring tasks, see if your task is + // necessary. Ex: Is the sector state correct for your stage to run? + Follows map[string]func(TaskID, AddTaskFunc) (bool, error) +} + +// TaskInterface must be implemented in order to have a task used by harmonytask. +type TaskInterface interface { + // Do the task assigned. Call stillOwned before making single-writer-only + // changes to ensure the work has not been stolen. + // This is the ONLY function that should attempt to do the work, and must + // ONLY be called by harmonytask. + // Indicate if the task no-longer needs scheduling with done=true including + // cases where it's past the deadline. + Do(taskID TaskID, stillOwned func() bool) (done bool, err error) + + // CanAccept should return if the task can run on this machine. It should + // return null if the task type is not allowed on this machine. + // It should select the task it most wants to accomplish. + // It is also responsible for determining & reserving disk space (including scratch). + CanAccept([]TaskID, *TaskEngine) (*TaskID, error) + + // TypeDetails() returns static details about how this task behaves and + // how this machine will run it. Read once at the beginning. + TypeDetails() TaskTypeDetails + + // This listener will consume all external sources continuously for work. + // Do() may also be called from a backlog of work. This must not + // start doing the work (it still must be scheduled). + // Note: Task de-duplication should happen in ExtraInfoFunc by + // returning false, typically by determining from the tx that the work + // exists already. The easy way is to have a unique joint index + // across all fields that will be common. + // Adder should typically only add its own task type, but multiple + // is possible for when 1 trigger starts 2 things. + // Usage Example: + // func (b *BazType)Adder(addTask AddTaskFunc) { + // for { + // bazMaker := <- bazChannel + // addTask("baz", func(t harmonytask.TaskID, txn db.Transaction) (bool, error) { + // _, err := txn.Exec(`INSERT INTO bazInfoTable (taskID, qix, mot) + // VALUES ($1,$2,$3)`, id, bazMaker.qix, bazMaker.mot) + // if err != nil { + // scream(err) + // return false + // } + // return true + // }) + // } + // } + Adder(AddTaskFunc) +} + +// AddTaskFunc is responsible for adding a task's details "extra info" to the DB. +// It should return true if the task should be added, false if it was already there. +// This is typically accomplished with a "unique" index on your detals table that +// would cause the insert to fail. +// The error indicates that instead of a conflict (which we should ignore) that we +// actually have a serious problem that needs to be logged with context. +type AddTaskFunc func(extraInfo func(TaskID, *harmonydb.Tx) (shouldCommit bool, seriousError error)) + +type TaskEngine struct { + ctx context.Context + handlers []*taskTypeHandler + db *harmonydb.DB + reg *resources.Reg + grace context.CancelFunc + taskMap map[string]*taskTypeHandler + ownerID int + follows map[string][]followStruct + lastFollowTime time.Time + lastCleanup atomic.Value + hostAndPort string +} +type followStruct struct { + f func(TaskID, AddTaskFunc) (bool, error) + h *taskTypeHandler + name string +} + +type TaskID int + +// New creates all the task definitions. Note that TaskEngine +// knows nothing about the tasks themselves and serves to be a +// generic container for common work +func New( + db *harmonydb.DB, + impls []TaskInterface, + hostnameAndPort string) (*TaskEngine, error) { + + reg, err := resources.Register(db, hostnameAndPort) + if err != nil { + return nil, fmt.Errorf("cannot get resources: %w", err) + } + ctx, grace := context.WithCancel(context.Background()) + e := &TaskEngine{ + ctx: ctx, + grace: grace, + db: db, + reg: reg, + ownerID: reg.Resources.MachineID, // The current number representing "hostAndPort" + taskMap: make(map[string]*taskTypeHandler, len(impls)), + follows: make(map[string][]followStruct), + hostAndPort: hostnameAndPort, + } + e.lastCleanup.Store(time.Now()) + for _, c := range impls { + h := taskTypeHandler{ + TaskInterface: c, + TaskTypeDetails: c.TypeDetails(), + TaskEngine: e, + } + e.handlers = append(e.handlers, &h) + e.taskMap[h.TaskTypeDetails.Name] = &h + } + + // resurrect old work + { + var taskRet []struct { + ID int + Name string + } + + err := db.Select(e.ctx, &taskRet, `SELECT id, name from harmony_task WHERE owner_id=$1`, e.ownerID) + if err != nil { + return nil, err + } + for _, w := range taskRet { + // edge-case: if old assignments are not available tasks, unlock them. + h := e.taskMap[w.Name] + if h == nil { + _, err := db.Exec(e.ctx, `UPDATE harmony_task SET owner=NULL WHERE id=$1`, w.ID) + if err != nil { + log.Errorw("Cannot remove self from owner field", "error", err) + continue // not really fatal, but not great + } + } + if !h.considerWork("recovered", []TaskID{TaskID(w.ID)}) { + log.Error("Strange: Unable to accept previously owned task: ", w.ID, w.Name) + } + } + } + for _, h := range e.handlers { + go h.Adder(h.AddTask) + } + go e.poller() + + return e, nil +} + +// GracefullyTerminate hangs until all present tasks have completed. +// Call this to cleanly exit the process. As some processes are long-running, +// passing a deadline will ignore those still running (to be picked-up later). +func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { + e.grace() + e.reg.Shutdown() + deadlineChan := time.NewTimer(deadline).C +top: + for _, h := range e.handlers { + if h.Count.Load() > 0 { + select { + case <-deadlineChan: + return + default: + time.Sleep(time.Millisecond) + goto top + } + } + } +} + +func (e *TaskEngine) poller() { + for { + select { + case <-time.NewTicker(POLL_DURATION).C: // Find work periodically + case <-e.ctx.Done(): ///////////////////// Graceful exit + return + } + e.pollerTryAllWork() + if time.Since(e.lastFollowTime) > FOLLOW_FREQUENCY { + e.followWorkInDB() + } + } +} + +// followWorkInDB implements "Follows" +func (e *TaskEngine) followWorkInDB() { + // Step 1: What are we following? + var lastFollowTime time.Time + lastFollowTime, e.lastFollowTime = e.lastFollowTime, time.Now() + + for fromName, srcs := range e.follows { + var cList []int // Which work is done (that we follow) since we last checked? + err := e.db.Select(e.ctx, &cList, `SELECT h.task_id FROM harmony_task_history + WHERE h.work_end>$1 AND h.name=$2`, lastFollowTime, fromName) + if err != nil { + log.Error("Could not query DB: ", err) + return + } + for _, src := range srcs { + for _, workAlreadyDone := range cList { // Were any tasks made to follow these tasks? + var ct int + err := e.db.QueryRow(e.ctx, `SELECT COUNT(*) FROM harmony_task + WHERE name=$1 AND previous_task=$2`, src.h.Name, workAlreadyDone).Scan(&ct) + if err != nil { + log.Error("Could not query harmony_task: ", err) + return // not recoverable here + } + if ct > 0 { + continue + } + // we need to create this task + b, err := src.h.Follows[fromName](TaskID(workAlreadyDone), src.h.AddTask) + if err != nil { + log.Errorw("Could not follow: ", "error", err) + continue + } + if !b { + // But someone may have beaten us to it. + log.Debugf("Unable to add task %s following Task(%d, %s)", src.h.Name, workAlreadyDone, fromName) + } + } + } + } +} + +// pollerTryAllWork starts the next 1 task +func (e *TaskEngine) pollerTryAllWork() { + if time.Since(e.lastCleanup.Load().(time.Time)) > CLEANUP_FREQUENCY { + e.lastCleanup.Store(time.Now()) + resources.CleanupMachines(e.ctx, e.db) + } + for _, v := range e.handlers { + if v.AssertMachineHasCapacity() != nil { + continue + } + var unownedTasks []TaskID + err := e.db.Select(e.ctx, &unownedTasks, `SELECT id + FROM harmony_task + WHERE owner_id IS NULL AND name=$1 + ORDER BY update_time`, v.Name) + if err != nil { + log.Error("Unable to read work ", err) + continue + } + if len(unownedTasks) > 0 { + accepted := v.considerWork("poller", unownedTasks) + if accepted { + return // accept new work slowly and in priority order + } + log.Warn("Work not accepted for " + strconv.Itoa(len(unownedTasks)) + " " + v.Name + " task(s)") + } + } +} + +// ResourcesAvailable determines what resources are still unassigned. +func (e *TaskEngine) ResourcesAvailable() resources.Resources { + tmp := e.reg.Resources + for _, t := range e.handlers { + ct := t.Count.Load() + tmp.Cpu -= int(ct) * t.Cost.Cpu + tmp.Gpu -= float64(ct) * t.Cost.Gpu + tmp.Ram -= uint64(ct) * t.Cost.Ram + } + return tmp +} diff --git a/lib/harmony/harmonytask/task_type_handler.go b/lib/harmony/harmonytask/task_type_handler.go new file mode 100644 index 000000000..79a156fef --- /dev/null +++ b/lib/harmony/harmonytask/task_type_handler.go @@ -0,0 +1,230 @@ +package harmonytask + +import ( + "context" + "errors" + "fmt" + "runtime" + "strconv" + "sync/atomic" + "time" + + logging "github.com/ipfs/go-log/v2" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" +) + +var log = logging.Logger("harmonytask") + +type taskTypeHandler struct { + TaskInterface + TaskTypeDetails + TaskEngine *TaskEngine + Count atomic.Int32 +} + +func (h *taskTypeHandler) AddTask(extra func(TaskID, *harmonydb.Tx) (bool, error)) { + var tID TaskID + _, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) (bool, error) { + // create taskID (from DB) + _, err := tx.Exec(`INSERT INTO harmony_task (name, added_by, posted_time) + VALUES ($1, $2, CURRENT_TIMESTAMP) `, h.Name, h.TaskEngine.ownerID) + if err != nil { + return false, fmt.Errorf("could not insert into harmonyTask: %w", err) + } + err = tx.QueryRow("SELECT id FROM harmony_task ORDER BY update_time DESC LIMIT 1").Scan(&tID) + if err != nil { + return false, fmt.Errorf("Could not select ID: %v", err) + } + return extra(tID, tx) + }) + + if err != nil { + if harmonydb.IsErrUniqueContraint(err) { + log.Debugf("addtask(%s) saw unique constraint, so it's added already.", h.Name) + return + } + log.Error("Could not add task. AddTasFunc failed: %v", err) + return + } +} + +// considerWork is called to attempt to start work on a task-id of this task type. +// It presumes single-threaded calling, so there should not be a multi-threaded re-entry. +// The only caller should be the one work poller thread. This does spin off other threads, +// but those should not considerWork. Work completing may lower the resource numbers +// unexpectedly, but that will not invalidate work being already able to fit. +func (h *taskTypeHandler) considerWork(from string, ids []TaskID) (workAccepted bool) { +top: + if len(ids) == 0 { + return true // stop looking for takers + } + + // 1. Can we do any more of this task type? + // NOTE: 0 is the default value, so this way people don't need to worry about + // this setting unless they want to limit the number of tasks of this type. + if h.Max > 0 && int(h.Count.Load()) >= h.Max { + log.Debugw("did not accept task", "name", h.Name, "reason", "at max already") + return false + } + + // 2. Can we do any more work? From here onward, we presume the resource + // story will not change, so single-threaded calling is best. + err := h.AssertMachineHasCapacity() + if err != nil { + log.Debugw("did not accept task", "name", h.Name, "reason", "at capacity already: "+err.Error()) + return false + } + + // 3. What does the impl say? + tID, err := h.CanAccept(ids, h.TaskEngine) + if err != nil { + log.Error(err) + return false + } + if tID == nil { + log.Infow("did not accept task", "task_id", ids[0], "reason", "CanAccept() refused", "name", h.Name) + return false + } + + // 4. Can we claim the work for our hostname? + ct, err := h.TaskEngine.db.Exec(h.TaskEngine.ctx, "UPDATE harmony_task SET owner_id=$1 WHERE id=$2 AND owner_id IS NULL", h.TaskEngine.ownerID, *tID) + if err != nil { + log.Error(err) + return false + } + if ct == 0 { + log.Infow("did not accept task", "task_id", strconv.Itoa(int(*tID)), "reason", "already Taken", "name", h.Name) + var tryAgain = make([]TaskID, 0, len(ids)-1) + for _, id := range ids { + if id != *tID { + tryAgain = append(tryAgain, id) + } + } + ids = tryAgain + goto top + } + + h.Count.Add(1) + go func() { + log.Infow("Beginning work on Task", "id", *tID, "from", from, "name", h.Name) + + var done bool + var doErr error + workStart := time.Now() + + defer func() { + if r := recover(); r != nil { + stackSlice := make([]byte, 4092) + sz := runtime.Stack(stackSlice, false) + log.Error("Recovered from a serious error "+ + "while processing "+h.Name+" task "+strconv.Itoa(int(*tID))+": ", r, + " Stack: ", string(stackSlice[:sz])) + } + h.Count.Add(-1) + + h.recordCompletion(*tID, workStart, done, doErr) + if done { + for _, fs := range h.TaskEngine.follows[h.Name] { // Do we know of any follows for this task type? + if _, err := fs.f(*tID, fs.h.AddTask); err != nil { + log.Error("Could not follow", "error", err, "from", h.Name, "to", fs.name) + } + } + } + }() + + done, doErr = h.Do(*tID, func() bool { + var owner int + // Background here because we don't want GracefulRestart to block this save. + err := h.TaskEngine.db.QueryRow(context.Background(), + `SELECT owner_id FROM harmony_task WHERE id=$1`, *tID).Scan(&owner) + if err != nil { + log.Error("Cannot determine ownership: ", err) + return false + } + return owner == h.TaskEngine.ownerID + }) + if doErr != nil { + log.Errorw("Do() returned error", "type", h.Name, "id", strconv.Itoa(int(*tID)), "error", doErr) + } + }() + return true +} + +func (h *taskTypeHandler) recordCompletion(tID TaskID, workStart time.Time, done bool, doErr error) { + workEnd := time.Now() + + cm, err := h.TaskEngine.db.BeginTransaction(h.TaskEngine.ctx, func(tx *harmonydb.Tx) (bool, error) { + var postedTime time.Time + err := tx.QueryRow(`SELECT posted_time FROM harmony_task WHERE id=$1`, tID).Scan(&postedTime) + if err != nil { + return false, fmt.Errorf("could not log completion: %w ", err) + } + result := "unspecified error" + if done { + _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) + if err != nil { + + return false, fmt.Errorf("could not log completion: %w", err) + } + result = "" + } else { + if doErr != nil { + result = "error: " + doErr.Error() + } + var deleteTask bool + if h.MaxFailures > 0 { + ct := uint(0) + err = tx.QueryRow(`SELECT count(*) FROM harmony_task_history + WHERE task_id=$1 AND result=FALSE`, tID).Scan(&ct) + if err != nil { + return false, fmt.Errorf("could not read task history: %w", err) + } + if ct >= h.MaxFailures { + deleteTask = true + } + } + if deleteTask { + _, err = tx.Exec("DELETE FROM harmony_task WHERE id=$1", tID) + if err != nil { + return false, fmt.Errorf("could not delete failed job: %w", err) + } + // Note: Extra Info is left laying around for later review & clean-up + } else { + _, err := tx.Exec(`UPDATE harmony_task SET owner_id=NULL WHERE id=$1`, tID) + if err != nil { + return false, fmt.Errorf("could not disown failed task: %v %v", tID, err) + } + } + } + _, err = tx.Exec(`INSERT INTO harmony_task_history + (task_id, name, posted, work_start, work_end, result, completed_by_host_and_port, err) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, tID, h.Name, postedTime, workStart, workEnd, done, h.TaskEngine.hostAndPort, result) + if err != nil { + return false, fmt.Errorf("could not write history: %w", err) + } + return true, nil + }) + if err != nil { + log.Error("Could not record transaction: ", err) + return + } + if !cm { + log.Error("Committing the task records failed") + } +} + +func (h *taskTypeHandler) AssertMachineHasCapacity() error { + r := h.TaskEngine.ResourcesAvailable() + + if r.Cpu-h.Cost.Cpu < 0 { + return errors.New("Did not accept " + h.Name + " task: out of cpu") + } + if h.Cost.Ram > r.Ram { + return errors.New("Did not accept " + h.Name + " task: out of RAM") + } + if r.Gpu-h.Cost.Gpu < 0 { + return errors.New("Did not accept " + h.Name + " task: out of available GPU") + } + return nil +} diff --git a/lib/harmony/resources/getGPU.go b/lib/harmony/resources/getGPU.go new file mode 100644 index 000000000..9a73bcd0d --- /dev/null +++ b/lib/harmony/resources/getGPU.go @@ -0,0 +1,22 @@ +//go:build !darwin +// +build !darwin + +package resources + +import ( + "strings" + + ffi "github.com/filecoin-project/filecoin-ffi" +) + +func getGPUDevices() float64 { // GPU boolean + gpus, err := ffi.GetGPUDevices() + if err != nil { + logger.Errorf("getting gpu devices failed: %+v", err) + } + all := strings.ToLower(strings.Join(gpus, ",")) + if len(gpus) > 1 || strings.Contains(all, "ati") || strings.Contains(all, "nvidia") { + return float64(len(gpus)) + } + return 0 +} diff --git a/lib/harmony/resources/getGPU_darwin.go b/lib/harmony/resources/getGPU_darwin.go new file mode 100644 index 000000000..a9c0a33cd --- /dev/null +++ b/lib/harmony/resources/getGPU_darwin.go @@ -0,0 +1,8 @@ +//go:build darwin +// +build darwin + +package resources + +func getGPUDevices() float64 { + return 10000.0 // unserious value intended for non-production use. +} diff --git a/lib/harmony/resources/memsys.go b/lib/harmony/resources/memsys.go new file mode 100644 index 000000000..1a45b5b22 --- /dev/null +++ b/lib/harmony/resources/memsys.go @@ -0,0 +1,22 @@ +//go:build darwin || freebsd || openbsd || dragonfly || netbsd +// +build darwin freebsd openbsd dragonfly netbsd + +package resources + +import ( + "encoding/binary" + "syscall" +) + +func sysctlUint64(name string) (uint64, error) { + s, err := syscall.Sysctl(name) + if err != nil { + return 0, err + } + // hack because the string conversion above drops a \0 + b := []byte(s) + if len(b) < 8 { + b = append(b, 0) + } + return binary.LittleEndian.Uint64(b), nil +} diff --git a/lib/harmony/resources/miniopencl/cl.h b/lib/harmony/resources/miniopencl/cl.h new file mode 100644 index 000000000..e90fb7692 --- /dev/null +++ b/lib/harmony/resources/miniopencl/cl.h @@ -0,0 +1,17 @@ + +#ifndef CL_H +#define CL_H + +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS + +#define CL_TARGET_OPENCL_VERSION 300 + +#ifdef __APPLE__ +#include "OpenCL/opencl.h" +#else +#include "CL/opencl.h" +#endif + +#endif /* CL_H */ \ No newline at end of file diff --git a/lib/harmony/resources/miniopencl/mini_opencl.go b/lib/harmony/resources/miniopencl/mini_opencl.go new file mode 100644 index 000000000..d2486a88f --- /dev/null +++ b/lib/harmony/resources/miniopencl/mini_opencl.go @@ -0,0 +1,93 @@ +// Package cl was borrowed from the go-opencl library which is more complex and +// doesn't compile well for our needs. +package cl + +// #include "cl.h" +import "C" +import ( + "fmt" + "unsafe" +) + +const maxPlatforms = 32 + +type Platform struct { + id C.cl_platform_id +} + +// Obtain the list of platforms available. +func GetPlatforms() ([]*Platform, error) { + var platformIds [maxPlatforms]C.cl_platform_id + var nPlatforms C.cl_uint + err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms) + if err == -1001 { // No platforms found + return nil, nil + } + if err != C.CL_SUCCESS { + return nil, toError(err) + } + platforms := make([]*Platform, nPlatforms) + for i := 0; i < int(nPlatforms); i++ { + platforms[i] = &Platform{id: platformIds[i]} + } + return platforms, nil +} + +const maxDeviceCount = 64 + +type DeviceType uint + +const ( + DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL +) + +type Device struct { + id C.cl_device_id +} + +func (p *Platform) GetAllDevices() ([]*Device, error) { + var deviceIds [maxDeviceCount]C.cl_device_id + var numDevices C.cl_uint + var platformId C.cl_platform_id + if p != nil { + platformId = p.id + } + if err := C.clGetDeviceIDs(platformId, C.cl_device_type(DeviceTypeAll), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS { + return nil, toError(err) + } + if numDevices > maxDeviceCount { + numDevices = maxDeviceCount + } + devices := make([]*Device, numDevices) + for i := 0; i < int(numDevices); i++ { + devices[i] = &Device{id: deviceIds[i]} + } + return devices, nil +} + +func toError(code C.cl_int) error { + return ErrOther(code) +} + +type ErrOther int + +func (e ErrOther) Error() string { + return fmt.Sprintf("OpenCL: error %d", int(e)) +} + +// Size of global device memory in bytes. +func (d *Device) GlobalMemSize() int64 { + val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true) + return val +} + +func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) { + var val C.cl_ulong + if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { + if panicOnError { + panic("Should never fail") + } + return 0, toError(err) + } + return int64(val), nil +} diff --git a/lib/harmony/resources/resources.go b/lib/harmony/resources/resources.go new file mode 100644 index 000000000..b129496d8 --- /dev/null +++ b/lib/harmony/resources/resources.go @@ -0,0 +1,142 @@ +package resources + +import ( + "bytes" + "context" + "os/exec" + "regexp" + "runtime" + "sync/atomic" + "time" + + logging "github.com/ipfs/go-log/v2" + "github.com/pbnjay/memory" + "golang.org/x/sys/unix" + "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" +) + +var LOOKS_DEAD_TIMEOUT = 10 * time.Minute // Time w/o minute heartbeats + +type Resources struct { + Cpu int + Gpu float64 + Ram uint64 + MachineID int +} +type Reg struct { + Resources + shutdown atomic.Bool +} + +var logger = logging.Logger("harmonytask") + +var lotusRE = regexp.MustCompile("lotus-worker|lotus-harmony|yugabyted|yb-master|yb-tserver") + +func Register(db *harmonydb.DB, hostnameAndPort string) (*Reg, error) { + var reg Reg + var err error + reg.Resources, err = getResources() + if err != nil { + return nil, err + } + ctx := context.Background() + { // Learn our owner_id while updating harmony_machines + var ownerID *int + + // Upsert query with last_contact update, fetch the machine ID + // (note this isn't a simple insert .. on conflict because host_and_port isn't unique) + err := db.QueryRow(ctx, ` + WITH upsert AS ( + UPDATE harmony_machines + SET cpu = $2, ram = $3, gpu = $4, last_contact = CURRENT_TIMESTAMP + WHERE host_and_port = $1 + RETURNING id + ), + inserted AS ( + INSERT INTO harmony_machines (host_and_port, cpu, ram, gpu, last_contact) + SELECT $1, $2, $3, $4, CURRENT_TIMESTAMP + WHERE NOT EXISTS (SELECT id FROM upsert) + RETURNING id + ) + SELECT id FROM upsert + UNION ALL + SELECT id FROM inserted; + `, hostnameAndPort, reg.Cpu, reg.Ram, reg.Gpu).Scan(&ownerID) + if err != nil { + return nil, xerrors.Errorf("inserting machine entry: %w", err) + } + if ownerID == nil { + return nil, xerrors.Errorf("no owner id") + } + + reg.MachineID = *ownerID + + cleaned := CleanupMachines(context.Background(), db) + logger.Infow("Cleaned up machines", "count", cleaned) + } + go func() { + for { + time.Sleep(time.Minute) + if reg.shutdown.Load() { + return + } + _, err := db.Exec(ctx, `UPDATE harmony_machines SET last_contact=CURRENT_TIMESTAMP`) + if err != nil { + logger.Error("Cannot keepalive ", err) + } + } + }() + + return ®, nil +} + +func CleanupMachines(ctx context.Context, db *harmonydb.DB) int { + ct, err := db.Exec(ctx, + `DELETE FROM harmony_machines WHERE last_contact < CURRENT_TIMESTAMP - INTERVAL '1 MILLISECOND' * $1 `, + LOOKS_DEAD_TIMEOUT.Milliseconds()) // ms enables unit testing to change timeout. + if err != nil { + logger.Warn("unable to delete old machines: ", err) + } + return ct +} + +func (res *Reg) Shutdown() { + res.shutdown.Store(true) +} + +func getResources() (res Resources, err error) { + b, err := exec.Command(`ps`, `-ef`).CombinedOutput() + if err != nil { + logger.Warn("Could not safety check for 2+ processes: ", err) + } else { + found := 0 + for _, b := range bytes.Split(b, []byte("\n")) { + if lotusRE.Match(b) { + found++ + } + } + if found > 1 { + logger.Warn("lotus-provider's defaults are for running alone. Use task maximums or CGroups.") + } + } + + res = Resources{ + Cpu: runtime.NumCPU(), + Ram: memory.FreeMemory(), + Gpu: getGPUDevices(), + } + + return res, nil +} + +func DiskFree(path string) (uint64, error) { + s := unix.Statfs_t{} + err := unix.Statfs(path, &s) + if err != nil { + return 0, err + } + + return s.Bfree * uint64(s.Bsize), nil +} diff --git a/lib/harmony/taskhelp/common.go b/lib/harmony/taskhelp/common.go new file mode 100644 index 000000000..eaeb4a1bf --- /dev/null +++ b/lib/harmony/taskhelp/common.go @@ -0,0 +1,19 @@ +package taskhelp + +// SubsetIf returns a subset of the slice for which the predicate is true. +// It does not allocate memory, but rearranges the list in place. +// A non-zero list input will always return a non-zero list. +// The return value is the subset and a boolean indicating whether the subset was sliced. +func SliceIfFound[T any](slice []T, f func(T) bool) ([]T, bool) { + ct := 0 + for i, v := range slice { + if f(v) { + slice[ct], slice[i] = slice[i], slice[ct] + ct++ + } + } + if ct == 0 { + return slice, false + } + return slice[:ct], true +} diff --git a/lib/promise/promise.go b/lib/promise/promise.go new file mode 100644 index 000000000..9b6a5e2b0 --- /dev/null +++ b/lib/promise/promise.go @@ -0,0 +1,47 @@ +package promise + +import ( + "context" + "sync" +) + +type Promise[T any] struct { + val T + done chan struct{} + mu sync.Mutex +} + +func (p *Promise[T]) Set(val T) { + p.mu.Lock() + defer p.mu.Unlock() + + // Set value + p.val = val + + // Initialize the done channel if it hasn't been initialized + if p.done == nil { + p.done = make(chan struct{}) + } + + // Signal that the value is set + close(p.done) +} + +func (p *Promise[T]) Val(ctx context.Context) T { + p.mu.Lock() + // Initialize the done channel if it hasn't been initialized + if p.done == nil { + p.done = make(chan struct{}) + } + p.mu.Unlock() + + select { + case <-ctx.Done(): + return *new(T) + case <-p.done: + p.mu.Lock() + val := p.val + p.mu.Unlock() + return val + } +} diff --git a/lib/promise/promise_test.go b/lib/promise/promise_test.go new file mode 100644 index 000000000..c2e9b656e --- /dev/null +++ b/lib/promise/promise_test.go @@ -0,0 +1,65 @@ +package promise + +import ( + "context" + "sync" + "testing" + "time" +) + +func TestPromiseSet(t *testing.T) { + p := &Promise[int]{} + + p.Set(42) + if p.val != 42 { + t.Fatalf("expected 42, got %v", p.val) + } +} + +func TestPromiseVal(t *testing.T) { + p := &Promise[int]{} + + p.Set(42) + + ctx := context.Background() + val := p.Val(ctx) + + if val != 42 { + t.Fatalf("expected 42, got %v", val) + } +} + +func TestPromiseValWaitsForSet(t *testing.T) { + p := &Promise[int]{} + var val int + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + ctx := context.Background() + val = p.Val(ctx) + }() + + time.Sleep(100 * time.Millisecond) // Give some time for the above goroutine to execute + p.Set(42) + wg.Wait() + + if val != 42 { + t.Fatalf("expected 42, got %v", val) + } +} + +func TestPromiseValContextCancel(t *testing.T) { + p := &Promise[int]{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel the context + + val := p.Val(ctx) + + var zeroValue int + if val != zeroValue { + t.Fatalf("expected zero-value, got %v", val) + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go index f20814e91..5d6ba79ec 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -705,46 +705,55 @@ var ( } ) +var views = []*view.View{ + InfoView, + PeerCountView, + APIRequestDurationView, + + GraphsyncReceivingPeersCountView, + GraphsyncReceivingActiveCountView, + GraphsyncReceivingCountCountView, + GraphsyncReceivingTotalMemoryAllocatedView, + GraphsyncReceivingTotalPendingAllocationsView, + GraphsyncReceivingPeersPendingView, + GraphsyncSendingPeersCountView, + GraphsyncSendingActiveCountView, + GraphsyncSendingCountCountView, + GraphsyncSendingTotalMemoryAllocatedView, + GraphsyncSendingTotalPendingAllocationsView, + GraphsyncSendingPeersPendingView, + + RcmgrAllowConnView, + RcmgrBlockConnView, + RcmgrAllowStreamView, + RcmgrBlockStreamView, + RcmgrAllowPeerView, + RcmgrBlockPeerView, + RcmgrAllowProtoView, + RcmgrBlockProtoView, + RcmgrBlockProtoPeerView, + RcmgrAllowSvcView, + RcmgrBlockSvcView, + RcmgrBlockSvcPeerView, + RcmgrAllowMemView, + RcmgrBlockMemView, +} + // DefaultViews is an array of OpenCensus views for metric gathering purposes var DefaultViews = func() []*view.View { - views := []*view.View{ - InfoView, - PeerCountView, - APIRequestDurationView, - - GraphsyncReceivingPeersCountView, - GraphsyncReceivingActiveCountView, - GraphsyncReceivingCountCountView, - GraphsyncReceivingTotalMemoryAllocatedView, - GraphsyncReceivingTotalPendingAllocationsView, - GraphsyncReceivingPeersPendingView, - GraphsyncSendingPeersCountView, - GraphsyncSendingActiveCountView, - GraphsyncSendingCountCountView, - GraphsyncSendingTotalMemoryAllocatedView, - GraphsyncSendingTotalPendingAllocationsView, - GraphsyncSendingPeersPendingView, - - RcmgrAllowConnView, - RcmgrBlockConnView, - RcmgrAllowStreamView, - RcmgrBlockStreamView, - RcmgrAllowPeerView, - RcmgrBlockPeerView, - RcmgrAllowProtoView, - RcmgrBlockProtoView, - RcmgrBlockProtoPeerView, - RcmgrAllowSvcView, - RcmgrBlockSvcView, - RcmgrBlockSvcPeerView, - RcmgrAllowMemView, - RcmgrBlockMemView, - } - views = append(views, blockstore.DefaultViews...) - views = append(views, rpcmetrics.DefaultViews...) return views }() +// RegisterViews adds views to the default list without modifying this file. +func RegisterViews(v ...*view.View) { + views = append(views, v...) +} + +func init() { + RegisterViews(blockstore.DefaultViews...) + RegisterViews(rpcmetrics.DefaultViews...) +} + var ChainNodeViews = append([]*view.View{ ChainNodeHeightView, ChainNodeHeightExpectedView, diff --git a/metrics/proxy/proxy.go b/metrics/proxy/proxy.go index dbadf255f..6885adfe9 100644 --- a/metrics/proxy/proxy.go +++ b/metrics/proxy/proxy.go @@ -10,6 +10,12 @@ import ( "github.com/filecoin-project/lotus/metrics" ) +func MetricedAPI[T, P any](a T) *P { + var out P + proxy(a, &out) + return &out +} + func MetricedStorMinerAPI(a api.StorageMiner) api.StorageMiner { var out api.StorageMinerStruct proxy(a, &out) diff --git a/miner/miner.go b/miner/miner.go index 329f34171..d11e9d4aa 100644 --- a/miner/miner.go +++ b/miner/miner.go @@ -71,7 +71,7 @@ func NewMiner(api v1api.FullNode, epp gen.WinningPoStProver, addr address.Addres api: api, epp: epp, address: addr, - waitFunc: func(ctx context.Context, baseTime uint64) (func(bool, abi.ChainEpoch, error), abi.ChainEpoch, error) { + propagationWaitFunc: func(ctx context.Context, baseTime uint64) (func(bool, abi.ChainEpoch, error), abi.ChainEpoch, error) { // wait around for half the block time in case other parents come in // // if we're mining a block in the past via catch-up/rush mining, @@ -114,7 +114,7 @@ type Miner struct { stop chan struct{} stopping chan struct{} - waitFunc waitFunc + propagationWaitFunc waitFunc // lastWork holds the last MiningBase we built upon. lastWork *MiningBase @@ -205,15 +205,21 @@ func (m *Miner) mine(ctx context.Context) { ctx, span := trace.StartSpan(ctx, "/mine") defer span.End() + // Perform the Winning PoSt warmup in a separate goroutine. go m.doWinPoStWarmup(ctx) var lastBase MiningBase + + // Start the main mining loop. minerLoop: for { + // Prepare a context for a single node operation. ctx := cliutil.OnSingleNode(ctx) + // Handle stop signals. select { case <-m.stop: + // If a stop signal is received, clean up and exit the mining loop. stopping := m.stopping m.stop = nil m.stopping = nil @@ -223,10 +229,11 @@ minerLoop: default: } - var base *MiningBase + var base *MiningBase // NOTE: This points to m.lastWork; Incrementing nulls here will increment it there. var onDone func(bool, abi.ChainEpoch, error) var injectNulls abi.ChainEpoch + // Look for the best mining candidate. for { prebase, err := m.GetBestMiningCandidate(ctx) if err != nil { @@ -237,6 +244,7 @@ minerLoop: continue } + // Check if we have a new base or if the current base is still valid. if base != nil && base.TipSet.Height() == prebase.TipSet.Height() && base.NullRounds == prebase.NullRounds { base = prebase break @@ -253,13 +261,13 @@ minerLoop: // best mining candidate at that time. // Wait until propagation delay period after block we plan to mine on - onDone, injectNulls, err = m.waitFunc(ctx, prebase.TipSet.MinTimestamp()) + onDone, injectNulls, err = m.propagationWaitFunc(ctx, prebase.TipSet.MinTimestamp()) if err != nil { log.Error(err) continue } - // just wait for the beacon entry to become available before we select our final mining base + // Ensure the beacon entry is available before finalizing the mining base. _, err = m.api.StateGetBeaconEntry(ctx, prebase.TipSet.Height()+prebase.NullRounds+1) if err != nil { log.Errorf("failed getting beacon entry: %s", err) @@ -272,8 +280,9 @@ minerLoop: base = prebase } - base.NullRounds += injectNulls // testing + base.NullRounds += injectNulls // Adjust for testing purposes. + // Check for repeated mining candidates and handle sleep for the next round. if base.TipSet.Equals(lastBase.TipSet) && lastBase.NullRounds == base.NullRounds { log.Warnf("BestMiningCandidate from the previous round: %s (nulls:%d)", lastBase.TipSet.Cids(), lastBase.NullRounds) if !m.niceSleep(time.Duration(build.BlockDelaySecs) * time.Second) { @@ -282,6 +291,7 @@ minerLoop: continue } + // Attempt to mine a block. b, err := m.mineOne(ctx, base) if err != nil { log.Errorf("mining block failed: %+v", err) @@ -299,9 +309,12 @@ minerLoop: } onDone(b != nil, h, nil) + // Process the mined block. if b != nil { + // Record the event of mining a block. m.journal.RecordEvent(m.evtTypes[evtTypeBlockMined], func() interface{} { return map[string]interface{}{ + // Data about the mined block. "parents": base.TipSet.Cids(), "nulls": base.NullRounds, "epoch": b.Header.Height, @@ -312,19 +325,23 @@ minerLoop: btime := time.Unix(int64(b.Header.Timestamp), 0) now := build.Clock.Now() + // Handle timing for broadcasting the block. switch { case btime == now: // block timestamp is perfectly aligned with time. case btime.After(now): + // Wait until it's time to broadcast the block. if !m.niceSleep(build.Clock.Until(btime)) { log.Warnf("received interrupt while waiting to broadcast block, will shutdown after block is sent out") build.Clock.Sleep(build.Clock.Until(btime)) } default: + // Log if the block was mined in the past. log.Warnw("mined block in the past", "block-time", btime, "time", build.Clock.Now(), "difference", build.Clock.Since(btime)) } + // Check for slash filter conditions. if os.Getenv("LOTUS_MINER_NO_SLASHFILTER") != "_yes_i_know_i_can_and_probably_will_lose_all_my_fil_and_power_" && !build.IsNearUpgrade(base.TipSet.Height(), build.UpgradeWatermelonFixHeight) { witness, fault, err := m.sf.MinedBlock(ctx, b.Header, base.TipSet.Height()+base.NullRounds) if err != nil { @@ -339,25 +356,27 @@ minerLoop: } } + // Check for blocks created at the same height. if _, ok := m.minedBlockHeights.Get(b.Header.Height); ok { log.Warnw("Created a block at the same height as another block we've created", "height", b.Header.Height, "miner", b.Header.Miner, "parents", b.Header.Parents) continue } + // Add the block height to the mined block heights. m.minedBlockHeights.Add(b.Header.Height, true) + // Submit the newly mined block. if err := m.api.SyncSubmitBlock(ctx, b); err != nil { log.Errorf("failed to submit newly mined block: %+v", err) } } else { + // If no block was mined, increase the null rounds and wait for the next epoch. base.NullRounds++ - // Wait until the next epoch, plus the propagation delay, so a new tipset - // has enough time to form. - // - // See: https://github.com/filecoin-project/lotus/issues/1845 + // Calculate the time for the next round. nextRound := time.Unix(int64(base.TipSet.MinTimestamp()+build.BlockDelaySecs*uint64(base.NullRounds))+int64(build.PropagationDelaySecs), 0) + // Wait for the next round or stop signal. select { case <-build.Clock.After(build.Clock.Until(nextRound)): case <-m.stop: diff --git a/miner/testminer.go b/miner/testminer.go index f1d11bae0..e23b26ae2 100644 --- a/miner/testminer.go +++ b/miner/testminer.go @@ -28,13 +28,13 @@ func NewTestMiner(nextCh <-chan MineReq, addr address.Address) func(v1api.FullNo } m := &Miner{ - api: api, - waitFunc: chanWaiter(nextCh), - epp: epp, - minedBlockHeights: arc, - address: addr, - sf: slashfilter.New(ds.NewMapDatastore()), - journal: journal.NilJournal(), + api: api, + propagationWaitFunc: chanWaiter(nextCh), + epp: epp, + minedBlockHeights: arc, + address: addr, + sf: slashfilter.New(ds.NewMapDatastore()), + journal: journal.NilJournal(), } if err := m.Start(context.TODO()); err != nil { diff --git a/node/builder_miner.go b/node/builder_miner.go index d9c92422d..08c71ba19 100644 --- a/node/builder_miner.go +++ b/node/builder_miner.go @@ -19,6 +19,7 @@ import ( "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/gen" "github.com/filecoin-project/lotus/chain/gen/slashfilter" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" "github.com/filecoin-project/lotus/markets/dagstore" "github.com/filecoin-project/lotus/markets/dealfilter" "github.com/filecoin-project/lotus/markets/idxprov" @@ -102,6 +103,7 @@ func ConfigStorageMiner(c interface{}) Option { If(cfg.Subsystems.EnableSealing, Error(xerrors.Errorf("sealing can only be enabled on a mining node"))), If(cfg.Subsystems.EnableSectorStorage, Error(xerrors.Errorf("sealing can only be enabled on a mining node"))), ), + If(cfg.Subsystems.EnableMining, If(!cfg.Subsystems.EnableSealing, Error(xerrors.Errorf("sealing can't be disabled on a mining node yet"))), If(!cfg.Subsystems.EnableSectorStorage, Error(xerrors.Errorf("sealing can't be disabled on a mining node yet"))), @@ -116,19 +118,35 @@ func ConfigStorageMiner(c interface{}) Option { // Mining / proving Override(new(*slashfilter.SlashFilter), modules.NewSlashFilter), - Override(new(*miner.Miner), modules.SetupBlockProducer), - Override(new(gen.WinningPoStProver), storage.NewWinningPoStProver), + + If(!cfg.Subsystems.DisableWinningPoSt, + Override(new(*miner.Miner), modules.SetupBlockProducer), + Override(new(gen.WinningPoStProver), storage.NewWinningPoStProver), + ), + Override(PreflightChecksKey, modules.PreflightChecks), Override(new(*sealing.Sealing), modules.SealingPipeline(cfg.Fees)), - Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(cfg.Fees, cfg.Proving)), + If(!cfg.Subsystems.DisableWindowPoSt, Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(cfg.Fees, cfg.Proving))), Override(new(sectorblocks.SectorBuilder), From(new(*sealing.Sealing))), ), If(cfg.Subsystems.EnableSectorStorage, // Sector storage - Override(new(*paths.Index), paths.NewIndex), - Override(new(paths.SectorIndex), From(new(*paths.Index))), + If(cfg.Subsystems.EnableSectorIndexDB, + Override(new(*paths.DBIndex), paths.NewDBIndex), + Override(new(paths.SectorIndex), From(new(*paths.DBIndex))), + + // sector index db is the only thing on lotus-miner that will use harmonydb + Override(new(*harmonydb.DB), func(cfg config.HarmonyDB, id harmonydb.ITestID) (*harmonydb.DB, error) { + return harmonydb.NewFromConfigWithITestID(cfg)(id) + }), + ), + If(!cfg.Subsystems.EnableSectorIndexDB, + Override(new(*paths.MemIndex), paths.NewMemIndex), + Override(new(paths.SectorIndex), From(new(*paths.MemIndex))), + ), + Override(new(*sectorstorage.Manager), modules.SectorStorage), Override(new(sectorstorage.Unsealer), From(new(*sectorstorage.Manager))), Override(new(sectorstorage.SectorManager), From(new(*sectorstorage.Manager))), @@ -230,6 +248,8 @@ func ConfigStorageMiner(c interface{}) Option { Override(new(config.SealerConfig), cfg.Storage), Override(new(config.ProvingConfig), cfg.Proving), + Override(new(config.HarmonyDB), cfg.HarmonyDB), + Override(new(harmonydb.ITestID), harmonydb.ITestID("")), Override(new(*ctladdr.AddressSelector), modules.AddressSelector(&cfg.Addresses)), ) } diff --git a/node/config/cfgdocgen/gen.go b/node/config/cfgdocgen/gen.go index 577e85f9d..b13b7d799 100644 --- a/node/config/cfgdocgen/gen.go +++ b/node/config/cfgdocgen/gen.go @@ -104,7 +104,7 @@ var Doc = map[string][]DocField{ for _, typeName := range outt { typ := out[typeName] - fmt.Printf("\t\"%s\": []DocField{\n", typeName) + fmt.Printf("\t\"%s\": {\n", typeName) for _, f := range typ { fmt.Println("\t\t{") diff --git a/node/config/def.go b/node/config/def.go index 47c0df98f..dc358b140 100644 --- a/node/config/def.go +++ b/node/config/def.go @@ -238,6 +238,7 @@ func DefaultStorageMiner() *StorageMiner { EnableSealing: true, EnableSectorStorage: true, EnableMarkets: false, + EnableSectorIndexDB: false, }, Fees: MinerFeeConfig{ @@ -274,6 +275,13 @@ func DefaultStorageMiner() *StorageMiner { MaxConcurrentUnseals: 5, GCInterval: Duration(1 * time.Minute), }, + HarmonyDB: HarmonyDB{ + Hosts: []string{"127.0.0.1"}, + Username: "yugabyte", + Password: "yugabyte", + Database: "yugabyte", + Port: "5433", + }, } cfg.Common.API.ListenAddress = "/ip4/127.0.0.1/tcp/2345/http" @@ -340,3 +348,37 @@ func DefaultUserRaftConfig() *UserRaftConfig { return &cfg } + +func DefaultLotusProvider() *LotusProviderConfig { + return &LotusProviderConfig{ + Subsystems: ProviderSubsystemsConfig{}, + Fees: LotusProviderFees{ + DefaultMaxFee: DefaultDefaultMaxFee, + MaxPreCommitGasFee: types.MustParseFIL("0.025"), + MaxCommitGasFee: types.MustParseFIL("0.05"), + + MaxPreCommitBatchGasFee: BatchFeeConfig{ + Base: types.MustParseFIL("0"), + PerSector: types.MustParseFIL("0.02"), + }, + MaxCommitBatchGasFee: BatchFeeConfig{ + Base: types.MustParseFIL("0"), + PerSector: types.MustParseFIL("0.03"), // enough for 6 agg and 1nFIL base fee + }, + + MaxTerminateGasFee: types.MustParseFIL("0.5"), + MaxWindowPoStGasFee: types.MustParseFIL("5"), + MaxPublishDealsFee: types.MustParseFIL("0.05"), + }, + Addresses: LotusProviderAddresses{ + PreCommitControl: []string{}, + CommitControl: []string{}, + TerminateControl: []string{}, + }, + Proving: ProvingConfig{ + ParallelCheckLimit: 32, + PartitionCheckTimeout: Duration(20 * time.Minute), + SingleCheckTimeout: Duration(10 * time.Minute), + }, + } +} diff --git a/node/config/def_test.go b/node/config/def_test.go index 1739339a2..627b65a56 100644 --- a/node/config/def_test.go +++ b/node/config/def_test.go @@ -71,6 +71,12 @@ func TestDefaultMinerRoundtrip(t *testing.T) { fmt.Println(s) + // Differs between test envs + c.HarmonyDB = HarmonyDB{} + c2.(*StorageMiner).HarmonyDB = HarmonyDB{} + + fmt.Println(c) + fmt.Println(c2) require.True(t, reflect.DeepEqual(c, c2)) } diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index 5a5bd4a8c..13ecb2706 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -9,7 +9,7 @@ type DocField struct { } var Doc = map[string][]DocField{ - "API": []DocField{ + "API": { { Name: "ListenAddress", Type: "string", @@ -29,7 +29,23 @@ var Doc = map[string][]DocField{ Comment: ``, }, }, - "Backup": []DocField{ + "ApisConfig": { + { + Name: "ChainApiInfo", + Type: "[]string", + + Comment: `ChainApiInfo is the API endpoint for the Lotus daemon.`, + }, + { + Name: "StorageRPCSecret", + Type: "string", + + Comment: `RPC Secret for the storage subsystem. +If integrating with lotus-miner this must match the value from +cat ~/.lotusminer/keystore/MF2XI2BNNJ3XILLQOJUXMYLUMU | jq -r .PrivateKey`, + }, + }, + "Backup": { { Name: "DisableMetadataLog", Type: "bool", @@ -41,7 +57,7 @@ Note that in case of metadata corruption it might be much harder to recover your node if metadata log is disabled`, }, }, - "BatchFeeConfig": []DocField{ + "BatchFeeConfig": { { Name: "Base", Type: "types.FIL", @@ -55,7 +71,7 @@ your node if metadata log is disabled`, Comment: ``, }, }, - "Chainstore": []DocField{ + "Chainstore": { { Name: "EnableSplitstore", Type: "bool", @@ -69,7 +85,7 @@ your node if metadata log is disabled`, Comment: ``, }, }, - "Client": []DocField{ + "Client": { { Name: "UseIpfs", Type: "bool", @@ -117,7 +133,7 @@ without existing payment channels with available funds will fail instead of automatically performing on-chain operations.`, }, }, - "Common": []DocField{ + "Common": { { Name: "API", Type: "API", @@ -149,7 +165,7 @@ of automatically performing on-chain operations.`, Comment: ``, }, }, - "DAGStoreConfig": []DocField{ + "DAGStoreConfig": { { Name: "RootDir", Type: "string", @@ -206,7 +222,7 @@ representation, e.g. 1m, 5m, 1h. Default value: 1 minute.`, }, }, - "DealmakingConfig": []DocField{ + "DealmakingConfig": { { Name: "ConsiderOnlineStorageDeals", Type: "bool", @@ -341,7 +357,7 @@ see https://lotus.filecoin.io/storage-providers/advanced-configurations/market/# Comment: ``, }, }, - "Events": []DocField{ + "Events": { { Name: "DisableRealTimeFilterAPI", Type: "bool", @@ -394,7 +410,7 @@ the database must already exist and be writeable. If a relative path is provided relative to the CWD (current working directory).`, }, }, - "FaultReporterConfig": []DocField{ + "FaultReporterConfig": { { Name: "EnableConsensusFaultReporter", Type: "bool", @@ -423,7 +439,7 @@ ReportConsensusFault messages. It will pay for gas fees, and receive any rewards. This address should have adequate funds to cover gas fees.`, }, }, - "FeeConfig": []DocField{ + "FeeConfig": { { Name: "DefaultMaxFee", Type: "types.FIL", @@ -431,7 +447,7 @@ rewards. This address should have adequate funds to cover gas fees.`, Comment: ``, }, }, - "FevmConfig": []DocField{ + "FevmConfig": { { Name: "EnableEthRPC", Type: "bool", @@ -453,7 +469,7 @@ Set to 0 to keep all mappings`, Comment: ``, }, }, - "FullNode": []DocField{ + "FullNode": { { Name: "Client", Type: "Client", @@ -503,7 +519,40 @@ Set to 0 to keep all mappings`, Comment: ``, }, }, - "IndexConfig": []DocField{ + "HarmonyDB": { + { + Name: "Hosts", + Type: "[]string", + + Comment: `HOSTS is a list of hostnames to nodes running YugabyteDB +in a cluster. Only 1 is required`, + }, + { + Name: "Username", + Type: "string", + + Comment: `The Yugabyte server's username with full credentials to operate on Lotus' Database. Blank for default.`, + }, + { + Name: "Password", + Type: "string", + + Comment: `The password for the related username. Blank for default.`, + }, + { + Name: "Database", + Type: "string", + + Comment: `The database (logical partition) within Yugabyte. Blank for default.`, + }, + { + Name: "Port", + Type: "string", + + Comment: `The port to find Yugabyte. Blank for default.`, + }, + }, + "IndexConfig": { { Name: "EnableMsgIndex", Type: "bool", @@ -512,7 +561,7 @@ Set to 0 to keep all mappings`, EnableMsgIndex enables indexing of messages on chain.`, }, }, - "IndexProviderConfig": []DocField{ + "IndexProviderConfig": { { Name: "Enable", Type: "bool", @@ -557,7 +606,15 @@ starts. By default, the cache is rehydrated from previously cached entries store datastore if any is present.`, }, }, - "Libp2p": []DocField{ + "JournalConfig": { + { + Name: "DisabledEvents", + Type: "string", + + Comment: `Events of the form: "system1:event1,system1:event2[,...]"`, + }, + }, + "Libp2p": { { Name: "ListenAddresses", Type: "[]string", @@ -624,7 +681,7 @@ count towards this limit.`, closed by the connection manager.`, }, }, - "Logging": []DocField{ + "Logging": { { Name: "SubsystemLevels", Type: "map[string]string", @@ -632,7 +689,137 @@ closed by the connection manager.`, Comment: `SubsystemLevels specify per-subsystem log levels`, }, }, - "MinerAddressConfig": []DocField{ + "LotusProviderAddresses": { + { + Name: "PreCommitControl", + Type: "[]string", + + Comment: `Addresses to send PreCommit messages from`, + }, + { + Name: "CommitControl", + Type: "[]string", + + Comment: `Addresses to send Commit messages from`, + }, + { + Name: "TerminateControl", + Type: "[]string", + + Comment: ``, + }, + { + Name: "DisableOwnerFallback", + Type: "bool", + + Comment: `DisableOwnerFallback disables usage of the owner address for messages +sent automatically`, + }, + { + Name: "DisableWorkerFallback", + Type: "bool", + + Comment: `DisableWorkerFallback disables usage of the worker address for messages +sent automatically, if control addresses are configured. +A control address that doesn't have enough funds will still be chosen +over the worker address if this flag is set.`, + }, + { + Name: "MinerAddresses", + Type: "[]string", + + Comment: `MinerAddresses are the addresses of the miner actors to use for sending messages`, + }, + }, + "LotusProviderConfig": { + { + Name: "Subsystems", + Type: "ProviderSubsystemsConfig", + + Comment: ``, + }, + { + Name: "Fees", + Type: "LotusProviderFees", + + Comment: ``, + }, + { + Name: "Addresses", + Type: "LotusProviderAddresses", + + Comment: ``, + }, + { + Name: "Proving", + Type: "ProvingConfig", + + Comment: ``, + }, + { + Name: "Journal", + Type: "JournalConfig", + + Comment: ``, + }, + { + Name: "Apis", + Type: "ApisConfig", + + Comment: ``, + }, + }, + "LotusProviderFees": { + { + Name: "DefaultMaxFee", + Type: "types.FIL", + + Comment: ``, + }, + { + Name: "MaxPreCommitGasFee", + Type: "types.FIL", + + Comment: ``, + }, + { + Name: "MaxCommitGasFee", + Type: "types.FIL", + + Comment: ``, + }, + { + Name: "MaxPreCommitBatchGasFee", + Type: "BatchFeeConfig", + + Comment: `maxBatchFee = maxBase + maxPerSector * nSectors`, + }, + { + Name: "MaxCommitBatchGasFee", + Type: "BatchFeeConfig", + + Comment: ``, + }, + { + Name: "MaxTerminateGasFee", + Type: "types.FIL", + + Comment: ``, + }, + { + Name: "MaxWindowPoStGasFee", + Type: "types.FIL", + + Comment: `WindowPoSt is a high-value operation, so the default fee should be high.`, + }, + { + Name: "MaxPublishDealsFee", + Type: "types.FIL", + + Comment: ``, + }, + }, + "MinerAddressConfig": { { Name: "PreCommitControl", Type: "[]string", @@ -674,7 +861,7 @@ A control address that doesn't have enough funds will still be chosen over the worker address if this flag is set.`, }, }, - "MinerFeeConfig": []DocField{ + "MinerFeeConfig": { { Name: "MaxPreCommitGasFee", Type: "types.FIL", @@ -730,7 +917,7 @@ over the worker address if this flag is set.`, Comment: ``, }, }, - "MinerSubsystemConfig": []DocField{ + "MinerSubsystemConfig": { { Name: "EnableMining", Type: "bool", @@ -755,6 +942,14 @@ over the worker address if this flag is set.`, Comment: ``, }, + { + Name: "EnableSectorIndexDB", + Type: "bool", + + Comment: `When enabled, the sector index will reside in an external database +as opposed to the local KV store in the miner process +This is useful to allow workers to bypass the lotus miner to access sector information`, + }, { Name: "SealerApiInfo", Type: "string", @@ -767,8 +962,59 @@ over the worker address if this flag is set.`, Comment: ``, }, + { + Name: "DisableWindowPoSt", + Type: "bool", + + Comment: `When window post is enabled, the miner will automatically submit window post proofs +for all sectors that are eligible for window post +IF WINDOW POST IS DISABLED, THE MINER WILL NOT SUBMIT WINDOW POST PROOFS +THIS WILL RESULT IN FAULTS AND PENALTIES IF NO OTHER MECHANISM IS RUNNING +TO SUBMIT WINDOW POST PROOFS. +Note: This option is entirely disabling the window post scheduler, +not just the builtin PoSt computation like Proving.DisableBuiltinWindowPoSt. +This option will stop lotus-miner from performing any actions related +to window post, including scheduling, submitting proofs, and recovering +sectors.`, + }, + { + Name: "DisableWinningPoSt", + Type: "bool", + + Comment: `When winning post is disabled, the miner process will NOT attempt to mine +blocks. This should only be set when there's an external process mining +blocks on behalf of the miner. +When disabled and no external block producers are configured, all potential +block rewards will be missed!`, + }, }, - "ProvingConfig": []DocField{ + "ProviderSubsystemsConfig": { + { + Name: "EnableWindowPost", + Type: "bool", + + Comment: ``, + }, + { + Name: "WindowPostMaxTasks", + Type: "int", + + Comment: ``, + }, + { + Name: "EnableWinningPost", + Type: "bool", + + Comment: ``, + }, + { + Name: "WinningPostMaxTasks", + Type: "int", + + Comment: ``, + }, + }, + "ProvingConfig": { { Name: "ParallelCheckLimit", Type: "int", @@ -892,7 +1138,7 @@ Note that setting this value lower may result in less efficient gas use - more m to prove each deadline, resulting in more total gas use (but each message will have lower gas limit)`, }, }, - "Pubsub": []DocField{ + "Pubsub": { { Name: "Bootstrapper", Type: "bool", @@ -952,7 +1198,7 @@ This property is used only if ElasticSearchTracer propery is set.`, Comment: `Auth token that will be passed with logs to elasticsearch - used for weighted peers score.`, }, }, - "RetrievalPricing": []DocField{ + "RetrievalPricing": { { Name: "Strategy", Type: "string", @@ -972,7 +1218,7 @@ This property is used only if ElasticSearchTracer propery is set.`, Comment: ``, }, }, - "RetrievalPricingDefault": []DocField{ + "RetrievalPricingDefault": { { Name: "VerifiedDealsFreeTransfer", Type: "bool", @@ -983,7 +1229,7 @@ This parameter is ONLY applicable if the retrieval pricing policy strategy has b default value is true`, }, }, - "RetrievalPricingExternal": []DocField{ + "RetrievalPricingExternal": { { Name: "Path", Type: "string", @@ -992,7 +1238,7 @@ default value is true`, This parameter is ONLY applicable if the retrieval pricing policy strategy has been configured to "external".`, }, }, - "SealerConfig": []DocField{ + "SealerConfig": { { Name: "ParallelFetchLimit", Type: "int", @@ -1093,7 +1339,7 @@ to use when evaluating tasks against this worker. An empty value defaults to "hardware".`, }, }, - "SealingConfig": []DocField{ + "SealingConfig": { { Name: "MaxWaitDealsSectors", Type: "uint64", @@ -1305,7 +1551,7 @@ Submitting a smaller number of prove commits per epoch would reduce the possibil Comment: `UseSyntheticPoRep, when set to true, will reduce the amount of cache data held on disk after the completion of PreCommit 2 to 11GiB.`, }, }, - "Splitstore": []DocField{ + "Splitstore": { { Name: "ColdStoreType", Type: "string", @@ -1372,7 +1618,7 @@ is set. Moving GC will not occur when total moving size exceeds HotstoreMaxSpaceTarget - HotstoreMaxSpaceSafetyBuffer`, }, }, - "StorageMiner": []DocField{ + "StorageMiner": { { Name: "Subsystems", Type: "MinerSubsystemConfig", @@ -1425,10 +1671,16 @@ HotstoreMaxSpaceTarget - HotstoreMaxSpaceSafetyBuffer`, Name: "DAGStore", Type: "DAGStoreConfig", + Comment: ``, + }, + { + Name: "HarmonyDB", + Type: "HarmonyDB", + Comment: ``, }, }, - "UserRaftConfig": []DocField{ + "UserRaftConfig": { { Name: "ClusterModeEnabled", Type: "bool", @@ -1490,7 +1742,7 @@ copies that we keep as backups (renaming) after cleanup.`, Comment: `Tracing enables propagation of contexts across binary boundaries.`, }, }, - "Wallet": []DocField{ + "Wallet": { { Name: "RemoteBackend", Type: "string", diff --git a/node/config/load.go b/node/config/load.go index 913350912..fd015d533 100644 --- a/node/config/load.go +++ b/node/config/load.go @@ -124,6 +124,7 @@ func ValidateSplitstoreSet(cfgRaw string) error { type cfgUpdateOpts struct { comment bool keepUncommented func(string) bool + noEnv bool } // UpdateCfgOpt is a functional option for updating the config @@ -149,6 +150,13 @@ func DefaultKeepUncommented() UpdateCfgOpt { return KeepUncommented(MatchEnableSplitstoreField) } +func NoEnv() UpdateCfgOpt { + return func(opts *cfgUpdateOpts) error { + opts.noEnv = true + return nil + } +} + // ConfigUpdate takes in a config and a default config and optionally comments out default values func ConfigUpdate(cfgCur, cfgDef interface{}, opts ...UpdateCfgOpt) ([]byte, error) { var updateOpts cfgUpdateOpts @@ -236,7 +244,9 @@ func ConfigUpdate(cfgCur, cfgDef interface{}, opts ...UpdateCfgOpt) ([]byte, err outLines = append(outLines, pad+"# type: "+doc.Type) } - outLines = append(outLines, pad+"# env var: LOTUS_"+strings.ToUpper(strings.ReplaceAll(section, ".", "_"))+"_"+strings.ToUpper(lf[0])) + if !updateOpts.noEnv { + outLines = append(outLines, pad+"# env var: LOTUS_"+strings.ToUpper(strings.ReplaceAll(section, ".", "_"))+"_"+strings.ToUpper(lf[0])) + } } } diff --git a/node/config/storage.go b/node/config/storage.go index dfe067840..ac5d57de8 100644 --- a/node/config/storage.go +++ b/node/config/storage.go @@ -2,8 +2,11 @@ package config import ( "encoding/json" + "errors" "io" + "io/fs" "os" + "path" "golang.org/x/xerrors" @@ -36,14 +39,31 @@ func StorageFromReader(reader io.Reader) (*storiface.StorageConfig, error) { return &cfg, nil } -func WriteStorageFile(path string, config storiface.StorageConfig) error { +func WriteStorageFile(filePath string, config storiface.StorageConfig) error { b, err := json.MarshalIndent(config, "", " ") if err != nil { return xerrors.Errorf("marshaling storage config: %w", err) } - if err := os.WriteFile(path, b, 0644); err != nil { - return xerrors.Errorf("persisting storage config (%s): %w", path, err) + info, err := os.Stat(filePath) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + return xerrors.Errorf("statting storage config (%s): %w", filePath, err) + } + if path.Base(filePath) == "." { + filePath = path.Join(filePath, "storage.json") + } + } else { + if info.IsDir() || path.Base(filePath) == "." { + filePath = path.Join(filePath, "storage.json") + } + } + + if err := os.MkdirAll(path.Dir(filePath), 0755); err != nil { + return xerrors.Errorf("making storage config parent directory: %w", err) + } + if err := os.WriteFile(filePath, b, 0644); err != nil { + return xerrors.Errorf("persisting storage config (%s): %w", filePath, err) } return nil diff --git a/node/config/types.go b/node/config/types.go index 013e2db1a..2152e0795 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -62,6 +62,40 @@ type StorageMiner struct { Fees MinerFeeConfig Addresses MinerAddressConfig DAGStore DAGStoreConfig + + HarmonyDB HarmonyDB +} + +type LotusProviderConfig struct { + Subsystems ProviderSubsystemsConfig + + Fees LotusProviderFees + Addresses LotusProviderAddresses + Proving ProvingConfig + Journal JournalConfig + Apis ApisConfig +} + +type ApisConfig struct { + // ChainApiInfo is the API endpoint for the Lotus daemon. + ChainApiInfo []string + + // RPC Secret for the storage subsystem. + // If integrating with lotus-miner this must match the value from + // cat ~/.lotusminer/keystore/MF2XI2BNNJ3XILLQOJUXMYLUMU | jq -r .PrivateKey + StorageRPCSecret string +} + +type JournalConfig struct { + //Events of the form: "system1:event1,system1:event2[,...]" + DisabledEvents string +} + +type ProviderSubsystemsConfig struct { + EnableWindowPost bool + WindowPostMaxTasks int + EnableWinningPost bool + WinningPostMaxTasks int } type DAGStoreConfig struct { @@ -109,8 +143,32 @@ type MinerSubsystemConfig struct { EnableSectorStorage bool EnableMarkets bool + // When enabled, the sector index will reside in an external database + // as opposed to the local KV store in the miner process + // This is useful to allow workers to bypass the lotus miner to access sector information + EnableSectorIndexDB bool + SealerApiInfo string // if EnableSealing == false SectorIndexApiInfo string // if EnableSectorStorage == false + + // When window post is enabled, the miner will automatically submit window post proofs + // for all sectors that are eligible for window post + // IF WINDOW POST IS DISABLED, THE MINER WILL NOT SUBMIT WINDOW POST PROOFS + // THIS WILL RESULT IN FAULTS AND PENALTIES IF NO OTHER MECHANISM IS RUNNING + // TO SUBMIT WINDOW POST PROOFS. + // Note: This option is entirely disabling the window post scheduler, + // not just the builtin PoSt computation like Proving.DisableBuiltinWindowPoSt. + // This option will stop lotus-miner from performing any actions related + // to window post, including scheduling, submitting proofs, and recovering + // sectors. + DisableWindowPoSt bool + + // When winning post is disabled, the miner process will NOT attempt to mine + // blocks. This should only be set when there's an external process mining + // blocks on behalf of the miner. + // When disabled and no external block producers are configured, all potential + // block rewards will be missed! + DisableWinningPoSt bool } type DealmakingConfig struct { @@ -494,6 +552,20 @@ type MinerFeeConfig struct { MaximizeWindowPoStFeeCap bool } +type LotusProviderFees struct { + DefaultMaxFee types.FIL + MaxPreCommitGasFee types.FIL + MaxCommitGasFee types.FIL + + // maxBatchFee = maxBase + maxPerSector * nSectors + MaxPreCommitBatchGasFee BatchFeeConfig + MaxCommitBatchGasFee BatchFeeConfig + + MaxTerminateGasFee types.FIL + // WindowPoSt is a high-value operation, so the default fee should be high. + MaxWindowPoStGasFee types.FIL + MaxPublishDealsFee types.FIL +} type MinerAddressConfig struct { // Addresses to send PreCommit messages from PreCommitControl []string @@ -512,6 +584,26 @@ type MinerAddressConfig struct { DisableWorkerFallback bool } +type LotusProviderAddresses struct { + // Addresses to send PreCommit messages from + PreCommitControl []string + // Addresses to send Commit messages from + CommitControl []string + TerminateControl []string + + // DisableOwnerFallback disables usage of the owner address for messages + // sent automatically + DisableOwnerFallback bool + // DisableWorkerFallback disables usage of the worker address for messages + // sent automatically, if control addresses are configured. + // A control address that doesn't have enough funds will still be chosen + // over the worker address if this flag is set. + DisableWorkerFallback bool + + // MinerAddresses are the addresses of the miner actors to use for sending messages + MinerAddresses []string +} + // API contains configs for API endpoint type API struct { // Binding address for the Lotus API @@ -735,6 +827,23 @@ type IndexConfig struct { EnableMsgIndex bool } +type HarmonyDB struct { + // HOSTS is a list of hostnames to nodes running YugabyteDB + // in a cluster. Only 1 is required + Hosts []string + + // The Yugabyte server's username with full credentials to operate on Lotus' Database. Blank for default. + Username string + + // The password for the related username. Blank for default. + Password string + + // The database (logical partition) within Yugabyte. Blank for default. + Database string + + // The port to find Yugabyte. Blank for default. + Port string +} type FaultReporterConfig struct { // EnableConsensusFaultReporter controls whether the node will monitor and // report consensus faults. When enabled, the node will watch for malicious diff --git a/node/impl/storminer.go b/node/impl/storminer.go index 25da71ef0..2ce42c327 100644 --- a/node/impl/storminer.go +++ b/node/impl/storminer.go @@ -45,6 +45,7 @@ import ( lminer "github.com/filecoin-project/lotus/chain/actors/builtin/miner" "github.com/filecoin-project/lotus/chain/gen" "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" mktsdagstore "github.com/filecoin-project/lotus/markets/dagstore" "github.com/filecoin-project/lotus/markets/storageadapter" "github.com/filecoin-project/lotus/miner" @@ -122,6 +123,8 @@ type StorageMinerAPI struct { GetSealingConfigFunc dtypes.GetSealingConfigFunc `optional:"true"` GetExpectedSealDurationFunc dtypes.GetExpectedSealDurationFunc `optional:"true"` SetExpectedSealDurationFunc dtypes.SetExpectedSealDurationFunc `optional:"true"` + + HarmonyDB *harmonydb.DB `optional:"true"` } var _ api.StorageMiner = &StorageMinerAPI{} diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index 12879cd64..0680029bf 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -311,12 +311,11 @@ func WindowPostScheduler(fc config.MinerFeeConfig, pc config.ProvingConfig) func verif = params.Verifier j = params.Journal as = params.AddrSel - maddr = address.Address(params.Maddr) ) ctx := helpers.LifecycleCtx(mctx, lc) - fps, err := wdpost.NewWindowedPoStScheduler(api, fc, pc, as, sealer, verif, sealer, j, maddr) + fps, err := wdpost.NewWindowedPoStScheduler(api, fc, pc, as, sealer, verif, sealer, j, []dtypes.MinerAddress{params.Maddr}) if err != nil { return nil, err diff --git a/node/repo/fsrepo.go b/node/repo/fsrepo.go index 03ddd2d6c..d8e41fb2b 100644 --- a/node/repo/fsrepo.go +++ b/node/repo/fsrepo.go @@ -185,6 +185,30 @@ func (worker) APIInfoEnvVars() (primary string, fallbacks []string, deprecated [ return "WORKER_API_INFO", nil, nil } +type provider struct{} + +var Provider provider + +func (provider) Type() string { + return "Provider" +} + +func (provider) Config() interface{} { + return &struct{}{} +} + +func (provider) APIFlags() []string { + return []string{"provider-api-url"} +} + +func (provider) RepoFlags() []string { + return []string{"provider-repo"} +} + +func (provider) APIInfoEnvVars() (primary string, fallbacks []string, deprecated []string) { + return "PROVIDER_API_INFO", nil, nil +} + var Wallet wallet type wallet struct { @@ -322,7 +346,7 @@ func (fsr *FsRepo) APIEndpoint() (multiaddr.Multiaddr, error) { f, err := os.Open(p) if os.IsNotExist(err) { - return nil, ErrNoAPIEndpoint + return nil, xerrors.Errorf("No file (%s): %w", p, ErrNoAPIEndpoint) } else if err != nil { return nil, err } diff --git a/node/repo/repo_test.go b/node/repo/repo_test.go index 16c101d44..c78afa9db 100644 --- a/node/repo/repo_test.go +++ b/node/repo/repo_test.go @@ -16,7 +16,7 @@ import ( func basicTest(t *testing.T, repo Repo) { apima, err := repo.APIEndpoint() if assert.Error(t, err) { - assert.Equal(t, ErrNoAPIEndpoint, err) + assert.ErrorContains(t, err, ErrNoAPIEndpoint.Error()) } assert.Nil(t, apima, "with no api endpoint, return should be nil") @@ -72,7 +72,7 @@ func basicTest(t *testing.T, repo Repo) { apima, err = repo.APIEndpoint() if assert.Error(t, err) { - assert.Equal(t, ErrNoAPIEndpoint, err, "after closing repo, api should be nil") + assert.ErrorContains(t, err, ErrNoAPIEndpoint.Error(), "after closing repo, api should be nil") } assert.Nil(t, apima, "with closed repo, apima should be set back to nil") diff --git a/provider/address.go b/provider/address.go new file mode 100644 index 000000000..f69ca3fac --- /dev/null +++ b/provider/address.go @@ -0,0 +1,51 @@ +package provider + +import ( + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + + "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/storage/ctladdr" +) + +func AddressSelector(addrConf *config.LotusProviderAddresses) func() (*ctladdr.AddressSelector, error) { + return func() (*ctladdr.AddressSelector, error) { + as := &ctladdr.AddressSelector{} + if addrConf == nil { + return as, nil + } + + as.DisableOwnerFallback = addrConf.DisableOwnerFallback + as.DisableWorkerFallback = addrConf.DisableWorkerFallback + + for _, s := range addrConf.PreCommitControl { + addr, err := address.NewFromString(s) + if err != nil { + return nil, xerrors.Errorf("parsing precommit control address: %w", err) + } + + as.PreCommitControl = append(as.PreCommitControl, addr) + } + + for _, s := range addrConf.CommitControl { + addr, err := address.NewFromString(s) + if err != nil { + return nil, xerrors.Errorf("parsing commit control address: %w", err) + } + + as.CommitControl = append(as.CommitControl, addr) + } + + for _, s := range addrConf.TerminateControl { + addr, err := address.NewFromString(s) + if err != nil { + return nil, xerrors.Errorf("parsing terminate control address: %w", err) + } + + as.TerminateControl = append(as.TerminateControl, addr) + } + + return as, nil + } +} diff --git a/provider/builder.go b/provider/builder.go new file mode 100644 index 000000000..81a1a7a0a --- /dev/null +++ b/provider/builder.go @@ -0,0 +1,50 @@ +package provider + +import ( + "context" + "time" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/node/config" + dtypes "github.com/filecoin-project/lotus/node/modules/dtypes" + "github.com/filecoin-project/lotus/provider/chainsched" + "github.com/filecoin-project/lotus/provider/lpmessage" + "github.com/filecoin-project/lotus/provider/lpwindow" + "github.com/filecoin-project/lotus/storage/ctladdr" + "github.com/filecoin-project/lotus/storage/paths" + "github.com/filecoin-project/lotus/storage/sealer" + "github.com/filecoin-project/lotus/storage/sealer/storiface" +) + +//var log = logging.Logger("provider") + +func WindowPostScheduler(ctx context.Context, fc config.LotusProviderFees, pc config.ProvingConfig, + api api.FullNode, verif storiface.Verifier, lw *sealer.LocalWorker, sender *lpmessage.Sender, + as *ctladdr.AddressSelector, addresses []dtypes.MinerAddress, db *harmonydb.DB, + stor paths.Store, idx paths.SectorIndex, max int) (*lpwindow.WdPostTask, *lpwindow.WdPostSubmitTask, *lpwindow.WdPostRecoverDeclareTask, error) { + + chainSched := chainsched.New(api) + + // todo config + ft := lpwindow.NewSimpleFaultTracker(stor, idx, 32, 5*time.Second, 300*time.Second) + + computeTask, err := lpwindow.NewWdPostTask(db, api, ft, lw, verif, chainSched, addresses, max) + if err != nil { + return nil, nil, nil, err + } + + submitTask, err := lpwindow.NewWdPostSubmitTask(chainSched, sender, db, api, fc.MaxWindowPoStGasFee, as) + if err != nil { + return nil, nil, nil, err + } + + recoverTask, err := lpwindow.NewWdPostRecoverDeclareTask(sender, db, api, ft, as, chainSched, fc.MaxWindowPoStGasFee, addresses) + if err != nil { + return nil, nil, nil, err + } + + go chainSched.Run(ctx) + + return computeTask, submitTask, recoverTask, nil +} diff --git a/provider/chainsched/chain_sched.go b/provider/chainsched/chain_sched.go new file mode 100644 index 000000000..559a0274f --- /dev/null +++ b/provider/chainsched/chain_sched.go @@ -0,0 +1,136 @@ +package chainsched + +import ( + "context" + "time" + + logging "github.com/ipfs/go-log/v2" + "go.opencensus.io/trace" + "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/build" + "github.com/filecoin-project/lotus/chain/store" + "github.com/filecoin-project/lotus/chain/types" +) + +var log = logging.Logger("chainsched") + +type NodeAPI interface { + ChainHead(context.Context) (*types.TipSet, error) + ChainNotify(context.Context) (<-chan []*api.HeadChange, error) +} + +type ProviderChainSched struct { + api NodeAPI + + callbacks []UpdateFunc + started bool +} + +func New(api NodeAPI) *ProviderChainSched { + return &ProviderChainSched{ + api: api, + } +} + +type UpdateFunc func(ctx context.Context, revert, apply *types.TipSet) error + +func (s *ProviderChainSched) AddHandler(ch UpdateFunc) error { + if s.started { + return xerrors.Errorf("cannot add handler after start") + } + + s.callbacks = append(s.callbacks, ch) + return nil +} + +func (s *ProviderChainSched) Run(ctx context.Context) { + s.started = true + + var ( + notifs <-chan []*api.HeadChange + err error + gotCur bool + ) + + // not fine to panic after this point + for { + if notifs == nil { + notifs, err = s.api.ChainNotify(ctx) + if err != nil { + log.Errorf("ChainNotify error: %+v", err) + + build.Clock.Sleep(10 * time.Second) + continue + } + + gotCur = false + log.Info("restarting window post scheduler") + } + + select { + case changes, ok := <-notifs: + if !ok { + log.Warn("window post scheduler notifs channel closed") + notifs = nil + continue + } + + if !gotCur { + if len(changes) != 1 { + log.Errorf("expected first notif to have len = 1") + continue + } + chg := changes[0] + if chg.Type != store.HCCurrent { + log.Errorf("expected first notif to tell current ts") + continue + } + + ctx, span := trace.StartSpan(ctx, "ProviderChainSched.headChange") + + s.update(ctx, nil, chg.Val) + + span.End() + gotCur = true + continue + } + + ctx, span := trace.StartSpan(ctx, "ProviderChainSched.headChange") + + var lowest, highest *types.TipSet = nil, nil + + for _, change := range changes { + if change.Val == nil { + log.Errorf("change.Val was nil") + } + switch change.Type { + case store.HCRevert: + lowest = change.Val + case store.HCApply: + highest = change.Val + } + } + + s.update(ctx, lowest, highest) + + span.End() + case <-ctx.Done(): + return + } + } +} + +func (s *ProviderChainSched) update(ctx context.Context, revert, apply *types.TipSet) { + if apply == nil { + log.Error("no new tipset in window post ProviderChainSched.update") + return + } + + for _, ch := range s.callbacks { + if err := ch(ctx, revert, apply); err != nil { + log.Errorf("handling head updates in provider chain sched: %+v", err) + } + } +} diff --git a/provider/lpmessage/sender.go b/provider/lpmessage/sender.go new file mode 100644 index 000000000..8d6cd4027 --- /dev/null +++ b/provider/lpmessage/sender.go @@ -0,0 +1,371 @@ +package lpmessage + +import ( + "bytes" + "context" + "time" + + "github.com/google/uuid" + "github.com/ipfs/go-cid" + logging "github.com/ipfs/go-log/v2" + "go.uber.org/multierr" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-state-types/big" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/lib/promise" +) + +var log = logging.Logger("lpmessage") + +var SendLockedWait = 100 * time.Millisecond + +type SenderAPI interface { + StateAccountKey(ctx context.Context, addr address.Address, tsk types.TipSetKey) (address.Address, error) + GasEstimateMessageGas(ctx context.Context, msg *types.Message, spec *api.MessageSendSpec, tsk types.TipSetKey) (*types.Message, error) + WalletBalance(ctx context.Context, addr address.Address) (big.Int, error) + MpoolGetNonce(context.Context, address.Address) (uint64, error) + MpoolPush(context.Context, *types.SignedMessage) (cid.Cid, error) +} + +type SignerAPI interface { + WalletSignMessage(context.Context, address.Address, *types.Message) (*types.SignedMessage, error) +} + +// Sender abstracts away highly-available message sending with coordination through +// HarmonyDB. It make sure that nonces are assigned transactionally, and that +// messages are correctly broadcasted to the network. It ensures that messages +// are sent serially, and that failures to send don't cause nonce gaps. +type Sender struct { + api SenderAPI + + sendTask *SendTask + + db *harmonydb.DB +} + +type SendTask struct { + sendTF promise.Promise[harmonytask.AddTaskFunc] + + api SenderAPI + signer SignerAPI + + db *harmonydb.DB +} + +func (s *SendTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + ctx := context.TODO() + + // get message from db + + var dbMsg struct { + FromKey string `db:"from_key"` + ToAddr string `db:"to_addr"` + + UnsignedData []byte `db:"unsigned_data"` + UnsignedCid string `db:"unsigned_cid"` + + // may not be null if we have somehow already signed but failed to send this message + Nonce *uint64 `db:"nonce"` + SignedData []byte `db:"signed_data"` + } + + err = s.db.QueryRow(ctx, `select from_key, nonce, to_addr, unsigned_data, unsigned_cid from message_sends where id = $1`, taskID).Scan(&dbMsg) + if err != nil { + return false, xerrors.Errorf("getting message from db: %w", err) + } + + // deserialize the message + var msg types.Message + err = msg.UnmarshalCBOR(bytes.NewReader(dbMsg.UnsignedData)) + if err != nil { + return false, xerrors.Errorf("unmarshaling unsigned db message: %w", err) + } + + // get db send lock + for { + // check if we still own the task + if !stillOwned() { + return false, xerrors.Errorf("lost ownership of task") + } + + // try to acquire lock + cn, err := s.db.Exec(ctx, `INSERT INTO message_send_locks (from_key, task_id, claimed_at) VALUES ($1, $2, CURRENT_TIMESTAMP) + ON CONFLICT (from_key) DO UPDATE SET task_id = EXCLUDED.task_id, claimed_at = CURRENT_TIMESTAMP WHERE message_send_locks.task_id = $2;`, dbMsg.FromKey, taskID) + if err != nil { + return false, xerrors.Errorf("acquiring send lock: %w", err) + } + + if cn == 1 { + // we got the lock + break + } + + // we didn't get the lock, wait a bit and try again + log.Infow("waiting for send lock", "task_id", taskID, "from", dbMsg.FromKey) + time.Sleep(SendLockedWait) + } + + // defer release db send lock + defer func() { + _, err2 := s.db.Exec(ctx, `delete from message_send_locks where from_key = $1 and task_id = $2`, dbMsg.FromKey, taskID) + if err2 != nil { + log.Errorw("releasing send lock", "task_id", taskID, "from", dbMsg.FromKey, "error", err2) + + // make sure harmony retries this task so that we eventually release this lock + done = false + err = multierr.Append(err, xerrors.Errorf("releasing send lock: %w", err2)) + } + }() + + // assign nonce IF NOT ASSIGNED (max(api.MpoolGetNonce, db nonce+1)) + var sigMsg *types.SignedMessage + + if dbMsg.Nonce == nil { + msgNonce, err := s.api.MpoolGetNonce(ctx, msg.From) + if err != nil { + return false, xerrors.Errorf("getting nonce from mpool: %w", err) + } + + // get nonce from db + var dbNonce *uint64 + r := s.db.QueryRow(ctx, `select max(nonce) from message_sends where from_key = $1 and send_success = true`, msg.From.String()) + if err := r.Scan(&dbNonce); err != nil { + return false, xerrors.Errorf("getting nonce from db: %w", err) + } + + if dbNonce != nil && *dbNonce+1 > msgNonce { + msgNonce = *dbNonce + 1 + } + + msg.Nonce = msgNonce + + // sign message + sigMsg, err = s.signer.WalletSignMessage(ctx, msg.From, &msg) + if err != nil { + return false, xerrors.Errorf("signing message: %w", err) + } + + data, err := sigMsg.Serialize() + if err != nil { + return false, xerrors.Errorf("serializing message: %w", err) + } + + jsonBytes, err := sigMsg.MarshalJSON() + if err != nil { + return false, xerrors.Errorf("marshaling message: %w", err) + } + + // write to db + + n, err := s.db.Exec(ctx, `update message_sends set nonce = $1, signed_data = $2, signed_json = $3, signed_cid = $4 where send_task_id = $5`, + msg.Nonce, data, string(jsonBytes), sigMsg.Cid().String(), taskID) + if err != nil { + return false, xerrors.Errorf("updating db record: %w", err) + } + if n != 1 { + log.Errorw("updating db record: expected 1 row to be affected, got %d", n) + return false, xerrors.Errorf("updating db record: expected 1 row to be affected, got %d", n) + } + } else { + // Note: this handles an unlikely edge-case: + // We have previously signed the message but either failed to send it or failed to update the db + // note that when that happens the likely cause is the provider process losing its db connection + // or getting killed before it can update the db. In that case the message lock will still be held + // so it will be safe to rebroadcast the signed message + + // deserialize the signed message + sigMsg = new(types.SignedMessage) + err = sigMsg.UnmarshalCBOR(bytes.NewReader(dbMsg.SignedData)) + if err != nil { + return false, xerrors.Errorf("unmarshaling signed db message: %w", err) + } + } + + // send! + _, err = s.api.MpoolPush(ctx, sigMsg) + + // persist send result + var sendSuccess = err == nil + var sendError string + if err != nil { + sendError = err.Error() + } + + _, err = s.db.Exec(ctx, `update message_sends set send_success = $1, send_error = $2, send_time = CURRENT_TIMESTAMP where send_task_id = $3`, sendSuccess, sendError, taskID) + if err != nil { + return false, xerrors.Errorf("updating db record: %w", err) + } + + return true, nil +} + +func (s *SendTask) CanAccept(ids []harmonytask.TaskID, engine *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + if len(ids) == 0 { + // probably can't happen, but panicking is bad + return nil, nil + } + + if s.signer == nil { + // can't sign messages here + return nil, nil + } + + return &ids[0], nil +} + +func (s *SendTask) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Max: 1024, + Name: "SendMessage", + Cost: resources.Resources{ + Cpu: 0, + Gpu: 0, + Ram: 1 << 20, + }, + MaxFailures: 1000, + Follows: nil, + } +} + +func (s *SendTask) Adder(taskFunc harmonytask.AddTaskFunc) { + s.sendTF.Set(taskFunc) +} + +var _ harmonytask.TaskInterface = &SendTask{} + +// NewSender creates a new Sender. +func NewSender(api SenderAPI, signer SignerAPI, db *harmonydb.DB) (*Sender, *SendTask) { + st := &SendTask{ + api: api, + signer: signer, + db: db, + } + + return &Sender{ + api: api, + db: db, + + sendTask: st, + }, st +} + +// Send atomically assigns a nonce, signs, and pushes a message +// to mempool. +// maxFee is only used when GasFeeCap/GasPremium fields aren't specified +// +// When maxFee is set to 0, Send will guess appropriate fee +// based on current chain conditions +// +// Send behaves much like fullnodeApi.MpoolPushMessage, but it coordinates +// through HarmonyDB, making it safe to broadcast messages from multiple independent +// API nodes +// +// Send is also currently more strict about required parameters than MpoolPushMessage +func (s *Sender) Send(ctx context.Context, msg *types.Message, mss *api.MessageSendSpec, reason string) (cid.Cid, error) { + if mss == nil { + return cid.Undef, xerrors.Errorf("MessageSendSpec cannot be nil") + } + if (mss.MsgUuid != uuid.UUID{}) { + return cid.Undef, xerrors.Errorf("MessageSendSpec.MsgUuid must be zero") + } + + fromA, err := s.api.StateAccountKey(ctx, msg.From, types.EmptyTSK) + if err != nil { + return cid.Undef, xerrors.Errorf("getting key address: %w", err) + } + + msg.From = fromA + + if msg.Nonce != 0 { + return cid.Undef, xerrors.Errorf("Send expects message nonce to be 0, was %d", msg.Nonce) + } + + msg, err = s.api.GasEstimateMessageGas(ctx, msg, mss, types.EmptyTSK) + if err != nil { + return cid.Undef, xerrors.Errorf("GasEstimateMessageGas error: %w", err) + } + + b, err := s.api.WalletBalance(ctx, msg.From) + if err != nil { + return cid.Undef, xerrors.Errorf("mpool push: getting origin balance: %w", err) + } + + requiredFunds := big.Add(msg.Value, msg.RequiredFunds()) + if b.LessThan(requiredFunds) { + return cid.Undef, xerrors.Errorf("mpool push: not enough funds: %s < %s", b, requiredFunds) + } + + // push the task + taskAdder := s.sendTask.sendTF.Val(ctx) + + unsBytes := new(bytes.Buffer) + err = msg.MarshalCBOR(unsBytes) + if err != nil { + return cid.Undef, xerrors.Errorf("marshaling message: %w", err) + } + + taskAdder(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) { + _, err := tx.Exec(`insert into message_sends (from_key, to_addr, send_reason, unsigned_data, unsigned_cid, send_task_id) values ($1, $2, $3, $4, $5, $6)`, + msg.From.String(), msg.To.String(), reason, unsBytes.Bytes(), msg.Cid().String(), id) + if err != nil { + return false, xerrors.Errorf("inserting message into db: %w", err) + } + + return true, nil + }) + + // wait for exec + var ( + pollInterval = 50 * time.Millisecond + pollIntervalMul = 2 + maxPollInterval = 5 * time.Second + pollLoops = 0 + + sigCid cid.Cid + sendErr error + ) + + for { + var err error + var sigCidStr, sendError string + var sendSuccess *bool + + err = s.db.QueryRow(ctx, `select signed_cid, send_success, send_error from message_sends where send_task_id = $1`, taskAdder).Scan(&sigCidStr, &sendSuccess, &sendError) + if err != nil { + return cid.Undef, xerrors.Errorf("getting cid for task: %w", err) + } + + if sendSuccess == nil { + time.Sleep(pollInterval) + pollLoops++ + pollInterval *= time.Duration(pollIntervalMul) + if pollInterval > maxPollInterval { + pollInterval = maxPollInterval + } + + continue + } + + if !*sendSuccess { + sendErr = xerrors.Errorf("send error: %s", sendError) + } else { + sigCid, err = cid.Parse(sigCidStr) + if err != nil { + return cid.Undef, xerrors.Errorf("parsing signed cid: %w", err) + } + } + + break + } + + log.Infow("sent message", "cid", sigCid, "task_id", taskAdder, "send_error", sendErr, "poll_loops", pollLoops) + + return sigCid, sendErr +} diff --git a/provider/lpwindow/compute_do.go b/provider/lpwindow/compute_do.go new file mode 100644 index 000000000..7089ceb02 --- /dev/null +++ b/provider/lpwindow/compute_do.go @@ -0,0 +1,442 @@ +package lpwindow + +import ( + "bytes" + "context" + "sort" + "sync" + "time" + + "github.com/ipfs/go-cid" + "go.uber.org/multierr" + "golang.org/x/xerrors" + + ffi "github.com/filecoin-project/filecoin-ffi" + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-bitfield" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/builtin" + miner2 "github.com/filecoin-project/go-state-types/builtin/v9/miner" + "github.com/filecoin-project/go-state-types/crypto" + "github.com/filecoin-project/go-state-types/dline" + "github.com/filecoin-project/go-state-types/proof" + proof7 "github.com/filecoin-project/specs-actors/v7/actors/runtime/proof" + + "github.com/filecoin-project/lotus/build" + "github.com/filecoin-project/lotus/chain/actors/builtin/miner" + types "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/storage/sealer" + "github.com/filecoin-project/lotus/storage/sealer/storiface" +) + +const disablePreChecks = false // todo config + +func (t *WdPostTask) DoPartition(ctx context.Context, ts *types.TipSet, maddr address.Address, di *dline.Info, partIdx uint64) (out *miner2.SubmitWindowedPoStParams, err error) { + defer func() { + if r := recover(); r != nil { + log.Errorf("recover: %s", r) + err = xerrors.Errorf("panic in doPartition: %s", r) + } + }() + + buf := new(bytes.Buffer) + if err := maddr.MarshalCBOR(buf); err != nil { + return nil, xerrors.Errorf("failed to marshal address to cbor: %w", err) + } + + headTs, err := t.api.ChainHead(ctx) + if err != nil { + return nil, xerrors.Errorf("getting current head: %w", err) + } + + rand, err := t.api.StateGetRandomnessFromBeacon(ctx, crypto.DomainSeparationTag_WindowedPoStChallengeSeed, di.Challenge, buf.Bytes(), headTs.Key()) + if err != nil { + return nil, xerrors.Errorf("failed to get chain randomness from beacon for window post (ts=%d; deadline=%d): %w", ts.Height(), di, err) + } + + parts, err := t.api.StateMinerPartitions(ctx, maddr, di.Index, ts.Key()) + if err != nil { + return nil, xerrors.Errorf("getting partitions: %w", err) + } + + if partIdx >= uint64(len(parts)) { + return nil, xerrors.Errorf("invalid partIdx %d (deadline has %d partitions)", partIdx, len(parts)) + } + + partition := parts[partIdx] + + params := miner2.SubmitWindowedPoStParams{ + Deadline: di.Index, + Partitions: make([]miner2.PoStPartition, 0, 1), + Proofs: nil, + } + + var partitions []miner2.PoStPartition + var xsinfos []proof7.ExtendedSectorInfo + + { + toProve, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors) + if err != nil { + return nil, xerrors.Errorf("removing faults from set of sectors to prove: %w", err) + } + /*if manual { + // this is a check run, we want to prove faulty sectors, even + // if they are not declared as recovering. + toProve = partition.LiveSectors + }*/ + toProve, err = bitfield.MergeBitFields(toProve, partition.RecoveringSectors) + if err != nil { + return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err) + } + + good, err := toProve.Copy() + if err != nil { + return nil, xerrors.Errorf("copy toProve: %w", err) + } + if !disablePreChecks { + good, err = checkSectors(ctx, t.api, t.faultTracker, maddr, toProve, ts.Key()) + if err != nil { + return nil, xerrors.Errorf("checking sectors to skip: %w", err) + } + } + + /*good, err = bitfield.SubtractBitField(good, postSkipped) + if err != nil { + return nil, xerrors.Errorf("toProve - postSkipped: %w", err) + } + + post skipped is legacy retry mechanism, shouldn't be needed anymore + */ + + skipped, err := bitfield.SubtractBitField(toProve, good) + if err != nil { + return nil, xerrors.Errorf("toProve - good: %w", err) + } + + sc, err := skipped.Count() + if err != nil { + return nil, xerrors.Errorf("getting skipped sector count: %w", err) + } + + skipCount := sc + + ssi, err := t.sectorsForProof(ctx, maddr, good, partition.AllSectors, ts) + if err != nil { + return nil, xerrors.Errorf("getting sorted sector info: %w", err) + } + + if len(ssi) == 0 { + return nil, xerrors.Errorf("no sectors to prove") + } + + xsinfos = append(xsinfos, ssi...) + partitions = append(partitions, miner2.PoStPartition{ + Index: partIdx, + Skipped: skipped, + }) + + log.Infow("running window post", + "chain-random", rand, + "deadline", di, + "height", ts.Height(), + "skipped", skipCount) + + tsStart := build.Clock.Now() + + mid, err := address.IDFromAddress(maddr) + if err != nil { + return nil, err + } + + nv, err := t.api.StateNetworkVersion(ctx, ts.Key()) + if err != nil { + return nil, xerrors.Errorf("getting network version: %w", err) + } + + ppt, err := xsinfos[0].SealProof.RegisteredWindowPoStProofByNetworkVersion(nv) + if err != nil { + return nil, xerrors.Errorf("failed to get window post type: %w", err) + } + + postOut, ps, err := t.generateWindowPoSt(ctx, ppt, abi.ActorID(mid), xsinfos, append(abi.PoStRandomness{}, rand...)) + elapsed := time.Since(tsStart) + log.Infow("computing window post", "partition", partIdx, "elapsed", elapsed, "skip", len(ps), "err", err) + if err != nil { + log.Errorf("error generating window post: %s", err) + } + + if err == nil { + // If we proved nothing, something is very wrong. + if len(postOut) == 0 { + log.Errorf("len(postOut) == 0") + return nil, xerrors.Errorf("received no proofs back from generate window post") + } + + headTs, err := t.api.ChainHead(ctx) + if err != nil { + return nil, xerrors.Errorf("getting current head: %w", err) + } + + checkRand, err := t.api.StateGetRandomnessFromBeacon(ctx, crypto.DomainSeparationTag_WindowedPoStChallengeSeed, di.Challenge, buf.Bytes(), headTs.Key()) + if err != nil { + return nil, xerrors.Errorf("failed to get chain randomness from beacon for window post (ts=%d; deadline=%d): %w", ts.Height(), di, err) + } + + if !bytes.Equal(checkRand, rand) { + // this is a check from legacy code, there it would retry with new randomness. + // here we don't retry because the current network version uses beacon randomness + // which should never change. We do keep this check tho to detect potential issues. + return nil, xerrors.Errorf("post generation randomness was different from random beacon") + } + + sinfos := make([]proof7.SectorInfo, len(xsinfos)) + for i, xsi := range xsinfos { + sinfos[i] = proof7.SectorInfo{ + SealProof: xsi.SealProof, + SectorNumber: xsi.SectorNumber, + SealedCID: xsi.SealedCID, + } + } + if correct, err := t.verifier.VerifyWindowPoSt(ctx, proof.WindowPoStVerifyInfo{ + Randomness: abi.PoStRandomness(checkRand), + Proofs: postOut, + ChallengedSectors: sinfos, + Prover: abi.ActorID(mid), + }); err != nil { + /*log.Errorw("window post verification failed", "post", postOut, "error", err) + time.Sleep(5 * time.Second) + continue todo retry loop */ + } else if !correct { + _ = correct + /*log.Errorw("generated incorrect window post proof", "post", postOut, "error", err) + continue todo retry loop*/ + } + + // Proof generation successful, stop retrying + //somethingToProve = true + params.Partitions = partitions + params.Proofs = postOut + //break + + return ¶ms, nil + } + } + + return nil, xerrors.Errorf("failed to generate window post") +} + +type CheckSectorsAPI interface { + StateMinerSectors(ctx context.Context, addr address.Address, bf *bitfield.BitField, tsk types.TipSetKey) ([]*miner.SectorOnChainInfo, error) +} + +func checkSectors(ctx context.Context, api CheckSectorsAPI, ft sealer.FaultTracker, + maddr address.Address, check bitfield.BitField, tsk types.TipSetKey) (bitfield.BitField, error) { + mid, err := address.IDFromAddress(maddr) + if err != nil { + return bitfield.BitField{}, xerrors.Errorf("failed to convert to ID addr: %w", err) + } + + sectorInfos, err := api.StateMinerSectors(ctx, maddr, &check, tsk) + if err != nil { + return bitfield.BitField{}, xerrors.Errorf("failed to get sector infos: %w", err) + } + + type checkSector struct { + sealed cid.Cid + update bool + } + + sectors := make(map[abi.SectorNumber]checkSector) + var tocheck []storiface.SectorRef + for _, info := range sectorInfos { + sectors[info.SectorNumber] = checkSector{ + sealed: info.SealedCID, + update: info.SectorKeyCID != nil, + } + tocheck = append(tocheck, storiface.SectorRef{ + ProofType: info.SealProof, + ID: abi.SectorID{ + Miner: abi.ActorID(mid), + Number: info.SectorNumber, + }, + }) + } + + if len(tocheck) == 0 { + return bitfield.BitField{}, nil + } + + pp, err := tocheck[0].ProofType.RegisteredWindowPoStProof() + if err != nil { + return bitfield.BitField{}, xerrors.Errorf("failed to get window PoSt proof: %w", err) + } + pp, err = pp.ToV1_1PostProof() + if err != nil { + return bitfield.BitField{}, xerrors.Errorf("failed to convert to v1_1 post proof: %w", err) + } + + bad, err := ft.CheckProvable(ctx, pp, tocheck, func(ctx context.Context, id abi.SectorID) (cid.Cid, bool, error) { + s, ok := sectors[id.Number] + if !ok { + return cid.Undef, false, xerrors.Errorf("sealed CID not found") + } + return s.sealed, s.update, nil + }) + if err != nil { + return bitfield.BitField{}, xerrors.Errorf("checking provable sectors: %w", err) + } + for id := range bad { + delete(sectors, id.Number) + } + + log.Warnw("Checked sectors", "checked", len(tocheck), "good", len(sectors)) + + sbf := bitfield.New() + for s := range sectors { + sbf.Set(uint64(s)) + } + + return sbf, nil +} + +func (t *WdPostTask) sectorsForProof(ctx context.Context, maddr address.Address, goodSectors, allSectors bitfield.BitField, ts *types.TipSet) ([]proof7.ExtendedSectorInfo, error) { + sset, err := t.api.StateMinerSectors(ctx, maddr, &goodSectors, ts.Key()) + if err != nil { + return nil, err + } + + if len(sset) == 0 { + return nil, nil + } + + sectorByID := make(map[uint64]proof7.ExtendedSectorInfo, len(sset)) + for _, sector := range sset { + sectorByID[uint64(sector.SectorNumber)] = proof7.ExtendedSectorInfo{ + SectorNumber: sector.SectorNumber, + SealedCID: sector.SealedCID, + SealProof: sector.SealProof, + SectorKey: sector.SectorKeyCID, + } + } + + proofSectors := make([]proof7.ExtendedSectorInfo, 0, len(sset)) + if err := allSectors.ForEach(func(sectorNo uint64) error { + if info, found := sectorByID[sectorNo]; found { + proofSectors = append(proofSectors, info) + } //else { + //skip + // todo: testing: old logic used to put 'substitute' sectors here + // that probably isn't needed post nv19, but we do need to check that + //} + return nil + }); err != nil { + return nil, xerrors.Errorf("iterating partition sector bitmap: %w", err) + } + + return proofSectors, nil +} + +func (t *WdPostTask) generateWindowPoSt(ctx context.Context, ppt abi.RegisteredPoStProof, minerID abi.ActorID, sectorInfo []proof.ExtendedSectorInfo, randomness abi.PoStRandomness) ([]proof.PoStProof, []abi.SectorID, error) { + var retErr error = nil + randomness[31] &= 0x3f + + out := make([]proof.PoStProof, 0) + + if len(sectorInfo) == 0 { + return nil, nil, xerrors.New("generate window post len(sectorInfo)=0") + } + + maxPartitionSize, err := builtin.PoStProofWindowPoStPartitionSectors(ppt) // todo proxy through chain/actors + if err != nil { + return nil, nil, xerrors.Errorf("get sectors count of partition failed:%+v", err) + } + + // The partitions number of this batch + // ceil(sectorInfos / maxPartitionSize) + partitionCount := uint64((len(sectorInfo) + int(maxPartitionSize) - 1) / int(maxPartitionSize)) + if partitionCount > 1 { + return nil, nil, xerrors.Errorf("generateWindowPoSt partitionCount:%d, only support 1", partitionCount) + } + + log.Infof("generateWindowPoSt maxPartitionSize:%d partitionCount:%d", maxPartitionSize, partitionCount) + + var skipped []abi.SectorID + var flk sync.Mutex + cctx, cancel := context.WithCancel(ctx) + defer cancel() + + sort.Slice(sectorInfo, func(i, j int) bool { + return sectorInfo[i].SectorNumber < sectorInfo[j].SectorNumber + }) + + sectorNums := make([]abi.SectorNumber, len(sectorInfo)) + sectorMap := make(map[abi.SectorNumber]proof.ExtendedSectorInfo) + for i, s := range sectorInfo { + sectorNums[i] = s.SectorNumber + sectorMap[s.SectorNumber] = s + } + + postChallenges, err := ffi.GeneratePoStFallbackSectorChallenges(ppt, minerID, randomness, sectorNums) + if err != nil { + return nil, nil, xerrors.Errorf("generating fallback challenges: %v", err) + } + + proofList := make([]ffi.PartitionProof, partitionCount) + var wg sync.WaitGroup + wg.Add(int(partitionCount)) + + for partIdx := uint64(0); partIdx < partitionCount; partIdx++ { + go func(partIdx uint64) { + defer wg.Done() + + sectors := make([]storiface.PostSectorChallenge, 0) + for i := uint64(0); i < maxPartitionSize; i++ { + si := i + partIdx*maxPartitionSize + if si >= uint64(len(postChallenges.Sectors)) { + break + } + + snum := postChallenges.Sectors[si] + sinfo := sectorMap[snum] + + sectors = append(sectors, storiface.PostSectorChallenge{ + SealProof: sinfo.SealProof, + SectorNumber: snum, + SealedCID: sinfo.SealedCID, + Challenge: postChallenges.Challenges[snum], + Update: sinfo.SectorKey != nil, + }) + } + + pr, err := t.prover.GenerateWindowPoStAdv(cctx, ppt, minerID, sectors, int(partIdx), randomness, true) + sk := pr.Skipped + + if err != nil || len(sk) > 0 { + log.Errorf("generateWindowPost part:%d, skipped:%d, sectors: %d, err: %+v", partIdx, len(sk), len(sectors), err) + flk.Lock() + skipped = append(skipped, sk...) + + if err != nil { + retErr = multierr.Append(retErr, xerrors.Errorf("partitionIndex:%d err:%+v", partIdx, err)) + } + flk.Unlock() + } + + proofList[partIdx] = ffi.PartitionProof(pr.PoStProofs) + }(partIdx) + } + + wg.Wait() + + if len(skipped) > 0 { + log.Warnw("generateWindowPoSt skipped sectors", "skipped", len(skipped)) + } + + postProofs, err := ffi.MergeWindowPoStPartitionProofs(ppt, proofList) + if err != nil { + return nil, skipped, xerrors.Errorf("merge windowPoSt partition proofs: %v", err) + } + + out = append(out, *postProofs) + return out, skipped, retErr +} diff --git a/provider/lpwindow/compute_task.go b/provider/lpwindow/compute_task.go new file mode 100644 index 000000000..e9d582704 --- /dev/null +++ b/provider/lpwindow/compute_task.go @@ -0,0 +1,428 @@ +package lpwindow + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "sort" + "strings" + + logging "github.com/ipfs/go-log/v2" + "github.com/samber/lo" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-bitfield" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/crypto" + "github.com/filecoin-project/go-state-types/dline" + "github.com/filecoin-project/go-state-types/network" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/chain/actors/builtin/miner" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/lib/harmony/taskhelp" + "github.com/filecoin-project/lotus/lib/promise" + "github.com/filecoin-project/lotus/node/modules/dtypes" + "github.com/filecoin-project/lotus/provider/chainsched" + "github.com/filecoin-project/lotus/storage/sealer" + "github.com/filecoin-project/lotus/storage/sealer/sealtasks" + "github.com/filecoin-project/lotus/storage/sealer/storiface" + "github.com/filecoin-project/lotus/storage/wdpost" +) + +var log = logging.Logger("lpwindow") + +var EpochsPerDeadline = miner.WPoStProvingPeriod() / abi.ChainEpoch(miner.WPoStPeriodDeadlines) + +type WdPostTaskDetails struct { + Ts *types.TipSet + Deadline *dline.Info +} + +type WDPoStAPI interface { + ChainHead(context.Context) (*types.TipSet, error) + ChainGetTipSet(context.Context, types.TipSetKey) (*types.TipSet, error) + StateMinerProvingDeadline(context.Context, address.Address, types.TipSetKey) (*dline.Info, error) + StateMinerInfo(context.Context, address.Address, types.TipSetKey) (api.MinerInfo, error) + ChainGetTipSetAfterHeight(context.Context, abi.ChainEpoch, types.TipSetKey) (*types.TipSet, error) + StateMinerPartitions(context.Context, address.Address, uint64, types.TipSetKey) ([]api.Partition, error) + StateGetRandomnessFromBeacon(ctx context.Context, personalization crypto.DomainSeparationTag, randEpoch abi.ChainEpoch, entropy []byte, tsk types.TipSetKey) (abi.Randomness, error) + StateNetworkVersion(context.Context, types.TipSetKey) (network.Version, error) + StateMinerSectors(context.Context, address.Address, *bitfield.BitField, types.TipSetKey) ([]*miner.SectorOnChainInfo, error) +} + +type ProverPoSt interface { + GenerateWindowPoStAdv(ctx context.Context, ppt abi.RegisteredPoStProof, mid abi.ActorID, sectors []storiface.PostSectorChallenge, partitionIdx int, randomness abi.PoStRandomness, allowSkip bool) (storiface.WindowPoStResult, error) +} + +type WdPostTask struct { + api WDPoStAPI + db *harmonydb.DB + + faultTracker sealer.FaultTracker + prover ProverPoSt + verifier storiface.Verifier + + windowPoStTF promise.Promise[harmonytask.AddTaskFunc] + + actors []dtypes.MinerAddress + max int +} + +type wdTaskIdentity struct { + SpID uint64 `db:"sp_id"` + ProvingPeriodStart abi.ChainEpoch `db:"proving_period_start"` + DeadlineIndex uint64 `db:"deadline_index"` + PartitionIndex uint64 `db:"partition_index"` +} + +func NewWdPostTask(db *harmonydb.DB, + api WDPoStAPI, + faultTracker sealer.FaultTracker, + prover ProverPoSt, + verifier storiface.Verifier, + + pcs *chainsched.ProviderChainSched, + actors []dtypes.MinerAddress, + max int, +) (*WdPostTask, error) { + t := &WdPostTask{ + db: db, + api: api, + + faultTracker: faultTracker, + prover: prover, + verifier: verifier, + + actors: actors, + max: max, + } + + if err := pcs.AddHandler(t.processHeadChange); err != nil { + return nil, err + } + + return t, nil +} + +func (t *WdPostTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + log.Debugw("WdPostTask.Do()", "taskID", taskID) + + var spID, pps, dlIdx, partIdx uint64 + + err = t.db.QueryRow(context.Background(), + `Select sp_id, proving_period_start, deadline_index, partition_index + from wdpost_partition_tasks + where task_id = $1`, taskID).Scan( + &spID, &pps, &dlIdx, &partIdx, + ) + if err != nil { + log.Errorf("WdPostTask.Do() failed to queryRow: %v", err) + return false, err + } + + head, err := t.api.ChainHead(context.Background()) + if err != nil { + log.Errorf("WdPostTask.Do() failed to get chain head: %v", err) + return false, err + } + + deadline := wdpost.NewDeadlineInfo(abi.ChainEpoch(pps), dlIdx, head.Height()) + + if deadline.PeriodElapsed() { + log.Errorf("WdPost removed stale task: %v %v", taskID, deadline) + return true, nil + } + + maddr, err := address.NewIDAddress(spID) + if err != nil { + log.Errorf("WdPostTask.Do() failed to NewIDAddress: %v", err) + return false, err + } + + ts, err := t.api.ChainGetTipSetAfterHeight(context.Background(), deadline.Challenge, head.Key()) + if err != nil { + log.Errorf("WdPostTask.Do() failed to ChainGetTipSetAfterHeight: %v", err) + return false, err + } + + postOut, err := t.DoPartition(context.Background(), ts, maddr, deadline, partIdx) + if err != nil { + log.Errorf("WdPostTask.Do() failed to doPartition: %v", err) + return false, err + } + + var msgbuf bytes.Buffer + if err := postOut.MarshalCBOR(&msgbuf); err != nil { + return false, xerrors.Errorf("marshaling PoSt: %w", err) + } + + testTaskIDCt := 0 + if err = t.db.QueryRow(context.Background(), `SELECT COUNT(*) FROM harmony_test WHERE task_id = $1`, taskID).Scan(&testTaskIDCt); err != nil { + return false, xerrors.Errorf("querying for test task: %w", err) + } + if testTaskIDCt == 1 { + // Do not send test tasks to the chain but to harmony_test & stdout. + + data, err := json.MarshalIndent(map[string]any{ + "sp_id": spID, + "proving_period_start": pps, + "deadline": deadline.Index, + "partition": partIdx, + "submit_at_epoch": deadline.Open, + "submit_by_epoch": deadline.Close, + "proof_params": msgbuf.Bytes(), + }, "", " ") + if err != nil { + return false, xerrors.Errorf("marshaling message: %w", err) + } + ctx := context.Background() + _, err = t.db.Exec(ctx, `UPDATE harmony_test SET result=$1 WHERE task_id=$2`, string(data), taskID) + if err != nil { + return false, xerrors.Errorf("updating harmony_test: %w", err) + } + log.Infof("SKIPPED sending test message to chain. SELECT * FROM harmony_test WHERE task_id= %v", taskID) + return true, nil // nothing committed + } + // Insert into wdpost_proofs table + n, err := t.db.Exec(context.Background(), + `INSERT INTO wdpost_proofs ( + sp_id, + proving_period_start, + deadline, + partition, + submit_at_epoch, + submit_by_epoch, + proof_params) + VALUES ($1, $2, $3, $4, $5, $6, $7)`, + spID, + pps, + deadline.Index, + partIdx, + deadline.Open, + deadline.Close, + msgbuf.Bytes(), + ) + + if err != nil { + log.Errorf("WdPostTask.Do() failed to insert into wdpost_proofs: %v", err) + return false, err + } + if n != 1 { + log.Errorf("WdPostTask.Do() failed to insert into wdpost_proofs: %v", err) + return false, err + } + + return true, nil +} + +func entToStr[T any](t T, i int) string { + return fmt.Sprint(t) +} + +func (t *WdPostTask) CanAccept(ids []harmonytask.TaskID, te *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + // GetEpoch + ts, err := t.api.ChainHead(context.Background()) + + if err != nil { + return nil, err + } + + // GetData for tasks + type wdTaskDef struct { + TaskID harmonytask.TaskID + SpID uint64 + ProvingPeriodStart abi.ChainEpoch + DeadlineIndex uint64 + PartitionIndex uint64 + + dlInfo *dline.Info `pgx:"-"` + openTs *types.TipSet + } + var tasks []wdTaskDef + + err = t.db.Select(context.Background(), &tasks, + `Select + task_id, + sp_id, + proving_period_start, + deadline_index, + partition_index + from wdpost_partition_tasks + where task_id IN (SELECT unnest(string_to_array($1, ','))::bigint)`, strings.Join(lo.Map(ids, entToStr[harmonytask.TaskID]), ",")) + if err != nil { + return nil, err + } + + // Accept those past deadline, then delete them in Do(). + for i := range tasks { + tasks[i].dlInfo = wdpost.NewDeadlineInfo(tasks[i].ProvingPeriodStart, tasks[i].DeadlineIndex, ts.Height()) + + if tasks[i].dlInfo.PeriodElapsed() { + return &tasks[i].TaskID, nil + } + + tasks[i].openTs, err = t.api.ChainGetTipSetAfterHeight(context.Background(), tasks[i].dlInfo.Open, ts.Key()) + if err != nil { + return nil, xerrors.Errorf("getting task open tipset: %w", err) + } + } + + // todo fix the block below + // workAdderMutex is held by taskTypeHandler.considerWork, which calls this CanAccept + // te.ResourcesAvailable will try to get that lock again, which will deadlock + + // Discard those too big for our free RAM + /*freeRAM := te.ResourcesAvailable().Ram + tasks = lo.Filter(tasks, func(d wdTaskDef, _ int) bool { + maddr, err := address.NewIDAddress(tasks[0].Sp_id) + if err != nil { + log.Errorf("WdPostTask.CanAccept() failed to NewIDAddress: %v", err) + return false + } + + mi, err := t.api.StateMinerInfo(context.Background(), maddr, ts.Key()) + if err != nil { + log.Errorf("WdPostTask.CanAccept() failed to StateMinerInfo: %v", err) + return false + } + + spt, err := policy.GetSealProofFromPoStProof(mi.WindowPoStProofType) + if err != nil { + log.Errorf("WdPostTask.CanAccept() failed to GetSealProofFromPoStProof: %v", err) + return false + } + + return res[spt].MaxMemory <= freeRAM + })*/ + if len(tasks) == 0 { + log.Infof("RAM too small for any WDPost task") + return nil, nil + } + + // Ignore those with too many failures unless they are the only ones left. + tasks, _ = taskhelp.SliceIfFound(tasks, func(d wdTaskDef) bool { + var r int + err := t.db.QueryRow(context.Background(), `SELECT COUNT(*) + FROM harmony_task_history + WHERE task_id = $1 AND result = false`, d.TaskID).Scan(&r) + if err != nil { + log.Errorf("WdPostTask.CanAccept() failed to queryRow: %v", err) + } + return r < 2 + }) + + // Select the one closest to the deadline + sort.Slice(tasks, func(i, j int) bool { + return tasks[i].dlInfo.Open < tasks[j].dlInfo.Open + }) + + return &tasks[0].TaskID, nil +} + +var res = storiface.ResourceTable[sealtasks.TTGenerateWindowPoSt] + +func (t *WdPostTask) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Name: "WdPost", + Max: t.max, + MaxFailures: 3, + Follows: nil, + Cost: resources.Resources{ + Cpu: 1, + + // todo set to something for 32/64G sector sizes? Technically windowPoSt is happy on a CPU + // but it will use a GPU if available + Gpu: 0, + + // RAM of smallest proof's max is listed here + Ram: lo.Reduce(lo.Keys(res), func(i uint64, k abi.RegisteredSealProof, _ int) uint64 { + if res[k].MaxMemory < i { + return res[k].MaxMemory + } + return i + }, 1<<63), + }, + } +} + +func (t *WdPostTask) Adder(taskFunc harmonytask.AddTaskFunc) { + t.windowPoStTF.Set(taskFunc) +} + +func (t *WdPostTask) processHeadChange(ctx context.Context, revert, apply *types.TipSet) error { + for _, act := range t.actors { + maddr := address.Address(act) + + aid, err := address.IDFromAddress(maddr) + if err != nil { + return xerrors.Errorf("getting miner ID: %w", err) + } + + di, err := t.api.StateMinerProvingDeadline(ctx, maddr, apply.Key()) + if err != nil { + return err + } + + if !di.PeriodStarted() { + return nil // not proving anything yet + } + + partitions, err := t.api.StateMinerPartitions(ctx, maddr, di.Index, apply.Key()) + if err != nil { + return xerrors.Errorf("getting partitions: %w", err) + } + + // TODO: Batch Partitions?? + + for pidx := range partitions { + tid := wdTaskIdentity{ + SpID: aid, + ProvingPeriodStart: di.PeriodStart, + DeadlineIndex: di.Index, + PartitionIndex: uint64(pidx), + } + + tf := t.windowPoStTF.Val(ctx) + if tf == nil { + return xerrors.Errorf("no task func") + } + + tf(func(id harmonytask.TaskID, tx *harmonydb.Tx) (bool, error) { + return t.addTaskToDB(id, tid, tx) + }) + } + } + + return nil +} + +func (t *WdPostTask) addTaskToDB(taskId harmonytask.TaskID, taskIdent wdTaskIdentity, tx *harmonydb.Tx) (bool, error) { + + _, err := tx.Exec( + `INSERT INTO wdpost_partition_tasks ( + task_id, + sp_id, + proving_period_start, + deadline_index, + partition_index + ) VALUES ($1, $2, $3, $4, $5)`, + taskId, + taskIdent.SpID, + taskIdent.ProvingPeriodStart, + taskIdent.DeadlineIndex, + taskIdent.PartitionIndex, + ) + if err != nil { + return false, xerrors.Errorf("insert partition task: %w", err) + } + + return true, nil +} + +var _ harmonytask.TaskInterface = &WdPostTask{} diff --git a/provider/lpwindow/faults_simple.go b/provider/lpwindow/faults_simple.go new file mode 100644 index 000000000..d43e8ee19 --- /dev/null +++ b/provider/lpwindow/faults_simple.go @@ -0,0 +1,152 @@ +package lpwindow + +import ( + "context" + "crypto/rand" + "fmt" + "sync" + "time" + + "golang.org/x/xerrors" + + ffi "github.com/filecoin-project/filecoin-ffi" + "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/storage/paths" + "github.com/filecoin-project/lotus/storage/sealer/storiface" +) + +type SimpleFaultTracker struct { + storage paths.Store + index paths.SectorIndex + + parallelCheckLimit int // todo live config? + singleCheckTimeout time.Duration + partitionCheckTimeout time.Duration +} + +func NewSimpleFaultTracker(storage paths.Store, index paths.SectorIndex, + parallelCheckLimit int, singleCheckTimeout time.Duration, partitionCheckTimeout time.Duration) *SimpleFaultTracker { + return &SimpleFaultTracker{ + storage: storage, + index: index, + + parallelCheckLimit: parallelCheckLimit, + singleCheckTimeout: singleCheckTimeout, + partitionCheckTimeout: partitionCheckTimeout, + } +} + +func (m *SimpleFaultTracker) CheckProvable(ctx context.Context, pp abi.RegisteredPoStProof, sectors []storiface.SectorRef, rg storiface.RGetter) (map[abi.SectorID]string, error) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + if rg == nil { + return nil, xerrors.Errorf("rg is nil") + } + + var bad = make(map[abi.SectorID]string) + var badLk sync.Mutex + + var postRand abi.PoStRandomness = make([]byte, abi.RandomnessLength) + _, _ = rand.Read(postRand) + postRand[31] &= 0x3f + + limit := m.parallelCheckLimit + if limit <= 0 { + limit = len(sectors) + } + throttle := make(chan struct{}, limit) + + addBad := func(s abi.SectorID, reason string) { + badLk.Lock() + bad[s] = reason + badLk.Unlock() + } + + if m.partitionCheckTimeout > 0 { + var cancel2 context.CancelFunc + ctx, cancel2 = context.WithTimeout(ctx, m.partitionCheckTimeout) + defer cancel2() + } + + var wg sync.WaitGroup + wg.Add(len(sectors)) + + for _, sector := range sectors { + select { + case throttle <- struct{}{}: + case <-ctx.Done(): + addBad(sector.ID, fmt.Sprintf("waiting for check worker: %s", ctx.Err())) + wg.Done() + continue + } + + go func(sector storiface.SectorRef) { + defer wg.Done() + defer func() { + <-throttle + }() + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + commr, update, err := rg(ctx, sector.ID) + if err != nil { + log.Warnw("CheckProvable Sector FAULT: getting commR", "sector", sector, "sealed", "err", err) + addBad(sector.ID, fmt.Sprintf("getting commR: %s", err)) + return + } + + toLock := storiface.FTSealed | storiface.FTCache + if update { + toLock = storiface.FTUpdate | storiface.FTUpdateCache + } + + locked, err := m.index.StorageTryLock(ctx, sector.ID, toLock, storiface.FTNone) + if err != nil { + addBad(sector.ID, fmt.Sprintf("tryLock error: %s", err)) + return + } + + if !locked { + log.Warnw("CheckProvable Sector FAULT: can't acquire read lock", "sector", sector) + addBad(sector.ID, fmt.Sprint("can't acquire read lock")) + return + } + + ch, err := ffi.GeneratePoStFallbackSectorChallenges(pp, sector.ID.Miner, postRand, []abi.SectorNumber{ + sector.ID.Number, + }) + if err != nil { + log.Warnw("CheckProvable Sector FAULT: generating challenges", "sector", sector, "err", err) + addBad(sector.ID, fmt.Sprintf("generating fallback challenges: %s", err)) + return + } + + vctx := ctx + + if m.singleCheckTimeout > 0 { + var cancel2 context.CancelFunc + vctx, cancel2 = context.WithTimeout(ctx, m.singleCheckTimeout) + defer cancel2() + } + + _, err = m.storage.GenerateSingleVanillaProof(vctx, sector.ID.Miner, storiface.PostSectorChallenge{ + SealProof: sector.ProofType, + SectorNumber: sector.ID.Number, + SealedCID: commr, + Challenge: ch.Challenges[sector.ID.Number], + Update: update, + }, pp) + if err != nil { + log.Warnw("CheckProvable Sector FAULT: generating vanilla proof", "sector", sector, "err", err) + addBad(sector.ID, fmt.Sprintf("generating vanilla proof: %s", err)) + return + } + }(sector) + } + + wg.Wait() + + return bad, nil +} diff --git a/provider/lpwindow/recover_task.go b/provider/lpwindow/recover_task.go new file mode 100644 index 000000000..6006f3c35 --- /dev/null +++ b/provider/lpwindow/recover_task.go @@ -0,0 +1,322 @@ +package lpwindow + +import ( + "context" + + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-bitfield" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/builtin" + "github.com/filecoin-project/go-state-types/dline" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/chain/actors" + "github.com/filecoin-project/lotus/chain/actors/builtin/miner" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/lib/promise" + "github.com/filecoin-project/lotus/node/modules/dtypes" + "github.com/filecoin-project/lotus/provider/chainsched" + "github.com/filecoin-project/lotus/provider/lpmessage" + "github.com/filecoin-project/lotus/storage/ctladdr" + "github.com/filecoin-project/lotus/storage/sealer" + "github.com/filecoin-project/lotus/storage/wdpost" +) + +type WdPostRecoverDeclareTask struct { + sender *lpmessage.Sender + db *harmonydb.DB + api WdPostRecoverDeclareTaskApi + faultTracker sealer.FaultTracker + + maxDeclareRecoveriesGasFee types.FIL + as *ctladdr.AddressSelector + actors []dtypes.MinerAddress + + startCheckTF promise.Promise[harmonytask.AddTaskFunc] +} + +type WdPostRecoverDeclareTaskApi interface { + ChainHead(context.Context) (*types.TipSet, error) + StateMinerProvingDeadline(context.Context, address.Address, types.TipSetKey) (*dline.Info, error) + StateMinerPartitions(context.Context, address.Address, uint64, types.TipSetKey) ([]api.Partition, error) + StateMinerInfo(context.Context, address.Address, types.TipSetKey) (api.MinerInfo, error) + StateMinerSectors(ctx context.Context, addr address.Address, bf *bitfield.BitField, tsk types.TipSetKey) ([]*miner.SectorOnChainInfo, error) + + GasEstimateMessageGas(context.Context, *types.Message, *api.MessageSendSpec, types.TipSetKey) (*types.Message, error) + GasEstimateFeeCap(context.Context, *types.Message, int64, types.TipSetKey) (types.BigInt, error) + GasEstimateGasPremium(_ context.Context, nblocksincl uint64, sender address.Address, gaslimit int64, tsk types.TipSetKey) (types.BigInt, error) + + WalletBalance(context.Context, address.Address) (types.BigInt, error) + WalletHas(context.Context, address.Address) (bool, error) + StateAccountKey(context.Context, address.Address, types.TipSetKey) (address.Address, error) + StateLookupID(context.Context, address.Address, types.TipSetKey) (address.Address, error) +} + +func NewWdPostRecoverDeclareTask(sender *lpmessage.Sender, + db *harmonydb.DB, + api WdPostRecoverDeclareTaskApi, + faultTracker sealer.FaultTracker, + as *ctladdr.AddressSelector, + pcs *chainsched.ProviderChainSched, + + maxDeclareRecoveriesGasFee types.FIL, + actors []dtypes.MinerAddress) (*WdPostRecoverDeclareTask, error) { + t := &WdPostRecoverDeclareTask{ + sender: sender, + db: db, + api: api, + faultTracker: faultTracker, + + maxDeclareRecoveriesGasFee: maxDeclareRecoveriesGasFee, + as: as, + actors: actors, + } + + if err := pcs.AddHandler(t.processHeadChange); err != nil { + return nil, err + } + + return t, nil +} + +func (w *WdPostRecoverDeclareTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + log.Debugw("WdPostRecoverDeclareTask.Do()", "taskID", taskID) + ctx := context.Background() + + var spID, pps, dlIdx, partIdx uint64 + + err = w.db.QueryRow(context.Background(), + `Select sp_id, proving_period_start, deadline_index, partition_index + from wdpost_recovery_tasks + where task_id = $1`, taskID).Scan( + &spID, &pps, &dlIdx, &partIdx, + ) + if err != nil { + log.Errorf("WdPostRecoverDeclareTask.Do() failed to queryRow: %v", err) + return false, err + } + + head, err := w.api.ChainHead(context.Background()) + if err != nil { + log.Errorf("WdPostRecoverDeclareTask.Do() failed to get chain head: %v", err) + return false, err + } + + deadline := wdpost.NewDeadlineInfo(abi.ChainEpoch(pps), dlIdx, head.Height()) + + if deadline.FaultCutoffPassed() { + log.Errorf("WdPostRecover removed stale task: %v %v", taskID, deadline) + return true, nil + } + + maddr, err := address.NewIDAddress(spID) + if err != nil { + log.Errorf("WdPostTask.Do() failed to NewIDAddress: %v", err) + return false, err + } + + partitions, err := w.api.StateMinerPartitions(context.Background(), maddr, dlIdx, head.Key()) + if err != nil { + log.Errorf("WdPostRecoverDeclareTask.Do() failed to get partitions: %v", err) + return false, err + } + + if partIdx >= uint64(len(partitions)) { + log.Errorf("WdPostRecoverDeclareTask.Do() failed to get partitions: partIdx >= len(partitions)") + return false, err + } + + partition := partitions[partIdx] + + unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors) + if err != nil { + return false, xerrors.Errorf("subtracting recovered set from fault set: %w", err) + } + + uc, err := unrecovered.Count() + if err != nil { + return false, xerrors.Errorf("counting unrecovered sectors: %w", err) + } + + if uc == 0 { + log.Warnw("nothing to declare recovered", "maddr", maddr, "deadline", deadline, "partition", partIdx) + return true, nil + } + + recovered, err := checkSectors(ctx, w.api, w.faultTracker, maddr, unrecovered, head.Key()) + if err != nil { + return false, xerrors.Errorf("checking unrecovered sectors: %w", err) + } + + // if all sectors failed to recover, don't declare recoveries + recoveredCount, err := recovered.Count() + if err != nil { + return false, xerrors.Errorf("counting recovered sectors: %w", err) + } + + if recoveredCount == 0 { + log.Warnw("no sectors recovered", "maddr", maddr, "deadline", deadline, "partition", partIdx) + return true, nil + } + + recDecl := miner.RecoveryDeclaration{ + Deadline: dlIdx, + Partition: partIdx, + Sectors: recovered, + } + + params := &miner.DeclareFaultsRecoveredParams{ + Recoveries: []miner.RecoveryDeclaration{recDecl}, + } + + enc, aerr := actors.SerializeParams(params) + if aerr != nil { + return false, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr) + } + + msg := &types.Message{ + To: maddr, + Method: builtin.MethodsMiner.DeclareFaultsRecovered, + Params: enc, + Value: types.NewInt(0), + } + + msg, mss, err := preparePoStMessage(w.api, w.as, maddr, msg, abi.TokenAmount(w.maxDeclareRecoveriesGasFee)) + if err != nil { + return false, xerrors.Errorf("sending declare recoveries message: %w", err) + } + + mc, err := w.sender.Send(ctx, msg, mss, "declare-recoveries") + if err != nil { + return false, xerrors.Errorf("sending declare recoveries message: %w", err) + } + + log.Debugw("WdPostRecoverDeclareTask.Do() sent declare recoveries message", "maddr", maddr, "deadline", deadline, "partition", partIdx, "mc", mc) + return true, nil +} + +func (w *WdPostRecoverDeclareTask) CanAccept(ids []harmonytask.TaskID, engine *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + if len(ids) == 0 { + // probably can't happen, but panicking is bad + return nil, nil + } + + if w.sender == nil { + // we can't send messages + return nil, nil + } + + return &ids[0], nil +} + +func (w *WdPostRecoverDeclareTask) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Max: 128, + Name: "WdPostRecoverDeclare", + Cost: resources.Resources{ + Cpu: 1, + Gpu: 0, + Ram: 128 << 20, + }, + MaxFailures: 10, + Follows: nil, + } +} + +func (w *WdPostRecoverDeclareTask) Adder(taskFunc harmonytask.AddTaskFunc) { + w.startCheckTF.Set(taskFunc) +} + +func (w *WdPostRecoverDeclareTask) processHeadChange(ctx context.Context, revert, apply *types.TipSet) error { + tf := w.startCheckTF.Val(ctx) + + for _, act := range w.actors { + maddr := address.Address(act) + + aid, err := address.IDFromAddress(maddr) + if err != nil { + return xerrors.Errorf("getting miner ID: %w", err) + } + + di, err := w.api.StateMinerProvingDeadline(ctx, maddr, apply.Key()) + if err != nil { + return err + } + + if !di.PeriodStarted() { + return nil // not proving anything yet + } + + // declaring two deadlines ahead + declDeadline := (di.Index + 2) % di.WPoStPeriodDeadlines + + pps := di.PeriodStart + if declDeadline != di.Index+2 { + pps = di.NextPeriodStart() + } + + partitions, err := w.api.StateMinerPartitions(ctx, maddr, declDeadline, apply.Key()) + if err != nil { + return xerrors.Errorf("getting partitions: %w", err) + } + + for pidx, partition := range partitions { + unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors) + if err != nil { + return xerrors.Errorf("subtracting recovered set from fault set: %w", err) + } + + uc, err := unrecovered.Count() + if err != nil { + return xerrors.Errorf("counting unrecovered sectors: %w", err) + } + + if uc == 0 { + log.Debugw("WdPostRecoverDeclareTask.processHeadChange() uc == 0, skipping", "maddr", maddr, "declDeadline", declDeadline, "pidx", pidx) + continue + } + + tid := wdTaskIdentity{ + SpID: aid, + ProvingPeriodStart: pps, + DeadlineIndex: declDeadline, + PartitionIndex: uint64(pidx), + } + + tf(func(id harmonytask.TaskID, tx *harmonydb.Tx) (bool, error) { + return w.addTaskToDB(id, tid, tx) + }) + } + } + + return nil +} + +func (w *WdPostRecoverDeclareTask) addTaskToDB(taskId harmonytask.TaskID, taskIdent wdTaskIdentity, tx *harmonydb.Tx) (bool, error) { + _, err := tx.Exec( + `INSERT INTO wdpost_recovery_tasks ( + task_id, + sp_id, + proving_period_start, + deadline_index, + partition_index + ) VALUES ($1, $2, $3, $4, $5)`, + taskId, + taskIdent.SpID, + taskIdent.ProvingPeriodStart, + taskIdent.DeadlineIndex, + taskIdent.PartitionIndex, + ) + if err != nil { + return false, xerrors.Errorf("insert partition task: %w", err) + } + + return true, nil +} + +var _ harmonytask.TaskInterface = &WdPostRecoverDeclareTask{} diff --git a/provider/lpwindow/submit_task.go b/provider/lpwindow/submit_task.go new file mode 100644 index 000000000..72f2499f6 --- /dev/null +++ b/provider/lpwindow/submit_task.go @@ -0,0 +1,305 @@ +package lpwindow + +import ( + "bytes" + "context" + + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/big" + "github.com/filecoin-project/go-state-types/builtin" + "github.com/filecoin-project/go-state-types/builtin/v9/miner" + "github.com/filecoin-project/go-state-types/crypto" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/lib/promise" + "github.com/filecoin-project/lotus/provider/chainsched" + "github.com/filecoin-project/lotus/provider/lpmessage" + "github.com/filecoin-project/lotus/storage/ctladdr" + "github.com/filecoin-project/lotus/storage/wdpost" +) + +type WdPoStSubmitTaskApi interface { + ChainHead(context.Context) (*types.TipSet, error) + + WalletBalance(context.Context, address.Address) (types.BigInt, error) + WalletHas(context.Context, address.Address) (bool, error) + + StateAccountKey(context.Context, address.Address, types.TipSetKey) (address.Address, error) + StateLookupID(context.Context, address.Address, types.TipSetKey) (address.Address, error) + StateMinerInfo(context.Context, address.Address, types.TipSetKey) (api.MinerInfo, error) + StateGetRandomnessFromTickets(ctx context.Context, personalization crypto.DomainSeparationTag, randEpoch abi.ChainEpoch, entropy []byte, tsk types.TipSetKey) (abi.Randomness, error) + + GasEstimateMessageGas(context.Context, *types.Message, *api.MessageSendSpec, types.TipSetKey) (*types.Message, error) + GasEstimateFeeCap(context.Context, *types.Message, int64, types.TipSetKey) (types.BigInt, error) + GasEstimateGasPremium(_ context.Context, nblocksincl uint64, sender address.Address, gaslimit int64, tsk types.TipSetKey) (types.BigInt, error) +} + +type WdPostSubmitTask struct { + sender *lpmessage.Sender + db *harmonydb.DB + api WdPoStSubmitTaskApi + + maxWindowPoStGasFee types.FIL + as *ctladdr.AddressSelector + + submitPoStTF promise.Promise[harmonytask.AddTaskFunc] +} + +func NewWdPostSubmitTask(pcs *chainsched.ProviderChainSched, send *lpmessage.Sender, db *harmonydb.DB, api WdPoStSubmitTaskApi, maxWindowPoStGasFee types.FIL, as *ctladdr.AddressSelector) (*WdPostSubmitTask, error) { + res := &WdPostSubmitTask{ + sender: send, + db: db, + api: api, + + maxWindowPoStGasFee: maxWindowPoStGasFee, + as: as, + } + + if err := pcs.AddHandler(res.processHeadChange); err != nil { + return nil, err + } + + return res, nil +} + +func (w *WdPostSubmitTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + log.Debugw("WdPostSubmitTask.Do", "taskID", taskID) + + var spID uint64 + var deadline uint64 + var partition uint64 + var pps, submitAtEpoch, submitByEpoch abi.ChainEpoch + var earlyParamBytes []byte + var dbTask uint64 + + err = w.db.QueryRow( + context.Background(), `SELECT sp_id, proving_period_start, deadline, partition, submit_at_epoch, submit_by_epoch, proof_params, submit_task_id + FROM wdpost_proofs WHERE submit_task_id = $1`, taskID, + ).Scan(&spID, &pps, &deadline, &partition, &submitAtEpoch, &submitByEpoch, &earlyParamBytes, &dbTask) + if err != nil { + return false, xerrors.Errorf("query post proof: %w", err) + } + + if dbTask != uint64(taskID) { + return false, xerrors.Errorf("taskID mismatch: %d != %d", dbTask, taskID) + } + + head, err := w.api.ChainHead(context.Background()) + if err != nil { + return false, xerrors.Errorf("getting chain head: %w", err) + } + + if head.Height() > submitByEpoch { + // we missed the deadline, no point in submitting + log.Errorw("missed submit deadline", "spID", spID, "deadline", deadline, "partition", partition, "submitByEpoch", submitByEpoch, "headHeight", head.Height()) + return true, nil + } + + if head.Height() < submitAtEpoch { + log.Errorw("submit epoch not reached", "spID", spID, "deadline", deadline, "partition", partition, "submitAtEpoch", submitAtEpoch, "headHeight", head.Height()) + return false, xerrors.Errorf("submit epoch not reached: %d < %d", head.Height(), submitAtEpoch) + } + + dlInfo := wdpost.NewDeadlineInfo(pps, deadline, head.Height()) + + var params miner.SubmitWindowedPoStParams + if err := params.UnmarshalCBOR(bytes.NewReader(earlyParamBytes)); err != nil { + return false, xerrors.Errorf("unmarshaling proof message: %w", err) + } + + commEpoch := dlInfo.Challenge + + commRand, err := w.api.StateGetRandomnessFromTickets(context.Background(), crypto.DomainSeparationTag_PoStChainCommit, commEpoch, nil, head.Key()) + if err != nil { + err = xerrors.Errorf("failed to get chain randomness from tickets for windowPost (epoch=%d): %w", commEpoch, err) + log.Errorf("submitPoStMessage failed: %+v", err) + + return false, xerrors.Errorf("getting post commit randomness: %w", err) + } + + params.ChainCommitEpoch = commEpoch + params.ChainCommitRand = commRand + + var pbuf bytes.Buffer + if err := params.MarshalCBOR(&pbuf); err != nil { + return false, xerrors.Errorf("marshaling proof message: %w", err) + } + + maddr, err := address.NewIDAddress(spID) + if err != nil { + return false, xerrors.Errorf("invalid miner address: %w", err) + } + + msg := &types.Message{ + To: maddr, + Method: builtin.MethodsMiner.SubmitWindowedPoSt, + Params: pbuf.Bytes(), + Value: big.Zero(), + } + + msg, mss, err := preparePoStMessage(w.api, w.as, maddr, msg, abi.TokenAmount(w.maxWindowPoStGasFee)) + if err != nil { + return false, xerrors.Errorf("preparing proof message: %w", err) + } + + ctx := context.Background() + smsg, err := w.sender.Send(ctx, msg, mss, "wdpost") + if err != nil { + return false, xerrors.Errorf("sending proof message: %w", err) + } + + // set message_cid in the wdpost_proofs entry + + _, err = w.db.Exec(ctx, `UPDATE wdpost_proofs SET message_cid = $1 WHERE sp_id = $2 AND proving_period_start = $3 AND deadline = $4 AND partition = $5`, smsg.String(), spID, pps, deadline, partition) + if err != nil { + return true, xerrors.Errorf("updating wdpost_proofs: %w", err) + } + + return true, nil +} + +func (w *WdPostSubmitTask) CanAccept(ids []harmonytask.TaskID, engine *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + if len(ids) == 0 { + // probably can't happen, but panicking is bad + return nil, nil + } + + if w.sender == nil { + // we can't send messages + return nil, nil + } + + return &ids[0], nil +} + +func (w *WdPostSubmitTask) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Max: 128, + Name: "WdPostSubmit", + Cost: resources.Resources{ + Cpu: 0, + Gpu: 0, + Ram: 10 << 20, + }, + MaxFailures: 10, + Follows: nil, // ?? + } +} + +func (w *WdPostSubmitTask) Adder(taskFunc harmonytask.AddTaskFunc) { + w.submitPoStTF.Set(taskFunc) +} + +func (w *WdPostSubmitTask) processHeadChange(ctx context.Context, revert, apply *types.TipSet) error { + tf := w.submitPoStTF.Val(ctx) + + qry, err := w.db.Query(ctx, `SELECT sp_id, proving_period_start, deadline, partition, submit_at_epoch FROM wdpost_proofs WHERE submit_task_id IS NULL AND submit_at_epoch <= $1`, apply.Height()) + if err != nil { + return err + } + defer qry.Close() + + for qry.Next() { + var spID int64 + var pps int64 + var deadline uint64 + var partition uint64 + var submitAtEpoch uint64 + if err := qry.Scan(&spID, &pps, &deadline, &partition, &submitAtEpoch); err != nil { + return xerrors.Errorf("scan submittable posts: %w", err) + } + + tf(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, err error) { + // update in transaction iff submit_task_id is still null + res, err := tx.Exec(`UPDATE wdpost_proofs SET submit_task_id = $1 WHERE sp_id = $2 AND proving_period_start = $3 AND deadline = $4 AND partition = $5 AND submit_task_id IS NULL`, id, spID, pps, deadline, partition) + if err != nil { + return false, xerrors.Errorf("query ready proof: %w", err) + } + if res != 1 { + return false, nil + } + + return true, nil + }) + } + if err := qry.Err(); err != nil { + return err + } + + return nil +} + +type MsgPrepAPI interface { + StateMinerInfo(context.Context, address.Address, types.TipSetKey) (api.MinerInfo, error) + GasEstimateMessageGas(context.Context, *types.Message, *api.MessageSendSpec, types.TipSetKey) (*types.Message, error) + GasEstimateFeeCap(context.Context, *types.Message, int64, types.TipSetKey) (types.BigInt, error) + GasEstimateGasPremium(ctx context.Context, nblocksincl uint64, sender address.Address, gaslimit int64, tsk types.TipSetKey) (types.BigInt, error) + + WalletBalance(context.Context, address.Address) (types.BigInt, error) + WalletHas(context.Context, address.Address) (bool, error) + StateAccountKey(context.Context, address.Address, types.TipSetKey) (address.Address, error) + StateLookupID(context.Context, address.Address, types.TipSetKey) (address.Address, error) +} + +func preparePoStMessage(w MsgPrepAPI, as *ctladdr.AddressSelector, maddr address.Address, msg *types.Message, maxFee abi.TokenAmount) (*types.Message, *api.MessageSendSpec, error) { + mi, err := w.StateMinerInfo(context.Background(), maddr, types.EmptyTSK) + if err != nil { + return nil, nil, xerrors.Errorf("error getting miner info: %w", err) + } + + // set the worker as a fallback + msg.From = mi.Worker + + mss := &api.MessageSendSpec{ + MaxFee: maxFee, + } + + // (optimal) initial estimation with some overestimation that guarantees + // block inclusion within the next 20 tipsets. + gm, err := w.GasEstimateMessageGas(context.Background(), msg, mss, types.EmptyTSK) + if err != nil { + log.Errorw("estimating gas", "error", err) + return nil, nil, xerrors.Errorf("estimating gas: %w", err) + } + *msg = *gm + + // calculate a more frugal estimation; premium is estimated to guarantee + // inclusion within 5 tipsets, and fee cap is estimated for inclusion + // within 4 tipsets. + minGasFeeMsg := *msg + + minGasFeeMsg.GasPremium, err = w.GasEstimateGasPremium(context.Background(), 5, msg.From, msg.GasLimit, types.EmptyTSK) + if err != nil { + log.Errorf("failed to estimate minimum gas premium: %+v", err) + minGasFeeMsg.GasPremium = msg.GasPremium + } + + minGasFeeMsg.GasFeeCap, err = w.GasEstimateFeeCap(context.Background(), &minGasFeeMsg, 4, types.EmptyTSK) + if err != nil { + log.Errorf("failed to estimate minimum gas fee cap: %+v", err) + minGasFeeMsg.GasFeeCap = msg.GasFeeCap + } + + // goodFunds = funds needed for optimal inclusion probability. + // minFunds = funds needed for more speculative inclusion probability. + goodFunds := big.Add(minGasFeeMsg.RequiredFunds(), minGasFeeMsg.Value) + minFunds := big.Min(big.Add(minGasFeeMsg.RequiredFunds(), minGasFeeMsg.Value), goodFunds) + + from, _, err := as.AddressFor(context.Background(), w, mi, api.PoStAddr, goodFunds, minFunds) + if err != nil { + return nil, nil, xerrors.Errorf("error getting address: %w", err) + } + + msg.From = from + + return msg, mss, nil +} + +var _ harmonytask.TaskInterface = &WdPostSubmitTask{} diff --git a/provider/lpwinning/winning_task.go b/provider/lpwinning/winning_task.go new file mode 100644 index 000000000..f02ffa1ae --- /dev/null +++ b/provider/lpwinning/winning_task.go @@ -0,0 +1,672 @@ +package lpwinning + +import ( + "bytes" + "context" + "crypto/rand" + "database/sql" + "encoding/binary" + "encoding/json" + "errors" + "time" + + "github.com/ipfs/go-cid" + logging "github.com/ipfs/go-log/v2" + "golang.org/x/xerrors" + + ffi "github.com/filecoin-project/filecoin-ffi" + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/crypto" + "github.com/filecoin-project/go-state-types/network" + prooftypes "github.com/filecoin-project/go-state-types/proof" + + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/build" + "github.com/filecoin-project/lotus/chain/gen" + lrand "github.com/filecoin-project/lotus/chain/rand" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/lib/harmony/harmonytask" + "github.com/filecoin-project/lotus/lib/harmony/resources" + "github.com/filecoin-project/lotus/lib/promise" + "github.com/filecoin-project/lotus/node/modules/dtypes" + "github.com/filecoin-project/lotus/storage/sealer/storiface" +) + +var log = logging.Logger("lpwinning") + +type WinPostTask struct { + max int + db *harmonydb.DB + + prover ProverWinningPoSt + verifier storiface.Verifier + + api WinPostAPI + actors []dtypes.MinerAddress + + mineTF promise.Promise[harmonytask.AddTaskFunc] +} + +type WinPostAPI interface { + ChainHead(context.Context) (*types.TipSet, error) + ChainTipSetWeight(context.Context, types.TipSetKey) (types.BigInt, error) + ChainGetTipSet(context.Context, types.TipSetKey) (*types.TipSet, error) + + StateGetBeaconEntry(context.Context, abi.ChainEpoch) (*types.BeaconEntry, error) + SyncSubmitBlock(context.Context, *types.BlockMsg) error + StateGetRandomnessFromBeacon(ctx context.Context, personalization crypto.DomainSeparationTag, randEpoch abi.ChainEpoch, entropy []byte, tsk types.TipSetKey) (abi.Randomness, error) + StateGetRandomnessFromTickets(ctx context.Context, personalization crypto.DomainSeparationTag, randEpoch abi.ChainEpoch, entropy []byte, tsk types.TipSetKey) (abi.Randomness, error) + StateNetworkVersion(context.Context, types.TipSetKey) (network.Version, error) + StateMinerInfo(context.Context, address.Address, types.TipSetKey) (api.MinerInfo, error) + + MinerGetBaseInfo(context.Context, address.Address, abi.ChainEpoch, types.TipSetKey) (*api.MiningBaseInfo, error) + MinerCreateBlock(context.Context, *api.BlockTemplate) (*types.BlockMsg, error) + MpoolSelect(context.Context, types.TipSetKey, float64) ([]*types.SignedMessage, error) + + WalletSign(context.Context, address.Address, []byte) (*crypto.Signature, error) +} + +type ProverWinningPoSt interface { + GenerateWinningPoSt(ctx context.Context, ppt abi.RegisteredPoStProof, minerID abi.ActorID, sectorInfo []storiface.PostSectorChallenge, randomness abi.PoStRandomness) ([]prooftypes.PoStProof, error) +} + +func NewWinPostTask(max int, db *harmonydb.DB, prover ProverWinningPoSt, verifier storiface.Verifier, api WinPostAPI, actors []dtypes.MinerAddress) *WinPostTask { + t := &WinPostTask{ + max: max, + db: db, + prover: prover, + verifier: verifier, + api: api, + actors: actors, + } + // TODO: run warmup + + go t.mineBasic(context.TODO()) + + return t +} + +func (t *WinPostTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) { + log.Debugw("WinPostTask.Do()", "taskID", taskID) + + ctx := context.TODO() + + type BlockCID struct { + CID string + } + + type MiningTaskDetails struct { + SpID uint64 + Epoch uint64 + BlockCIDs []BlockCID + CompTime time.Time + } + + var details MiningTaskDetails + + // First query to fetch from mining_tasks + err = t.db.QueryRow(ctx, `SELECT sp_id, epoch, base_compute_time FROM mining_tasks WHERE task_id = $1`, taskID).Scan(&details.SpID, &details.Epoch, &details.CompTime) + if err != nil { + return false, err + } + + // Second query to fetch from mining_base_block + rows, err := t.db.Query(ctx, `SELECT block_cid FROM mining_base_block WHERE task_id = $1`, taskID) + if err != nil { + return false, err + } + defer rows.Close() + + for rows.Next() { + var cid BlockCID + if err := rows.Scan(&cid.CID); err != nil { + return false, err + } + details.BlockCIDs = append(details.BlockCIDs, cid) + } + + if err := rows.Err(); err != nil { + return false, err + } + + // construct base + maddr, err := address.NewIDAddress(details.SpID) + if err != nil { + return false, err + } + + var bcids []cid.Cid + for _, c := range details.BlockCIDs { + bcid, err := cid.Parse(c.CID) + if err != nil { + return false, err + } + bcids = append(bcids, bcid) + } + + tsk := types.NewTipSetKey(bcids...) + baseTs, err := t.api.ChainGetTipSet(ctx, tsk) + if err != nil { + return false, xerrors.Errorf("loading base tipset: %w", err) + } + + base := MiningBase{ + TipSet: baseTs, + AddRounds: abi.ChainEpoch(details.Epoch) - baseTs.Height() - 1, + ComputeTime: details.CompTime, + } + + persistNoWin := func() error { + _, err := t.db.Exec(ctx, `UPDATE mining_base_block SET no_win = true WHERE task_id = $1`, taskID) + if err != nil { + return xerrors.Errorf("marking base as not-won: %w", err) + } + + return nil + } + + // ensure we have a beacon entry for the epoch we're mining on + round := base.epoch() + + _ = retry1(func() (*types.BeaconEntry, error) { + return t.api.StateGetBeaconEntry(ctx, round) + }) + + // MAKE A MINING ATTEMPT!! + log.Debugw("attempting to mine a block", "tipset", types.LogCids(base.TipSet.Cids())) + + mbi, err := t.api.MinerGetBaseInfo(ctx, maddr, round, base.TipSet.Key()) + if err != nil { + return false, xerrors.Errorf("failed to get mining base info: %w", err) + } + if mbi == nil { + // not eligible to mine on this base, we're done here + log.Debugw("WinPoSt not eligible to mine on this base", "tipset", types.LogCids(base.TipSet.Cids())) + return true, persistNoWin() + } + + if !mbi.EligibleForMining { + // slashed or just have no power yet, we're done here + log.Debugw("WinPoSt not eligible for mining", "tipset", types.LogCids(base.TipSet.Cids())) + return true, persistNoWin() + } + + if len(mbi.Sectors) == 0 { + log.Warnw("WinPoSt no sectors to mine", "tipset", types.LogCids(base.TipSet.Cids())) + return false, xerrors.Errorf("no sectors selected for winning PoSt") + } + + var rbase types.BeaconEntry + var bvals []types.BeaconEntry + var eproof *types.ElectionProof + + // winner check + { + bvals = mbi.BeaconEntries + rbase = mbi.PrevBeaconEntry + if len(bvals) > 0 { + rbase = bvals[len(bvals)-1] + } + + eproof, err = gen.IsRoundWinner(ctx, round, maddr, rbase, mbi, t.api) + if err != nil { + log.Warnw("WinPoSt failed to check if we win next round", "error", err) + return false, xerrors.Errorf("failed to check if we win next round: %w", err) + } + + if eproof == nil { + // not a winner, we're done here + log.Debugw("WinPoSt not a winner", "tipset", types.LogCids(base.TipSet.Cids())) + return true, persistNoWin() + } + } + + // winning PoSt + var wpostProof []prooftypes.PoStProof + { + buf := new(bytes.Buffer) + if err := maddr.MarshalCBOR(buf); err != nil { + err = xerrors.Errorf("failed to marshal miner address: %w", err) + return false, err + } + + brand, err := lrand.DrawRandomnessFromBase(rbase.Data, crypto.DomainSeparationTag_WinningPoStChallengeSeed, round, buf.Bytes()) + if err != nil { + err = xerrors.Errorf("failed to get randomness for winning post: %w", err) + return false, err + } + + prand := abi.PoStRandomness(brand) + prand[31] &= 0x3f // make into fr + + sectorNums := make([]abi.SectorNumber, len(mbi.Sectors)) + for i, s := range mbi.Sectors { + sectorNums[i] = s.SectorNumber + } + + ppt, err := mbi.Sectors[0].SealProof.RegisteredWinningPoStProof() + if err != nil { + return false, xerrors.Errorf("mapping sector seal proof type to post proof type: %w", err) + } + + postChallenges, err := ffi.GeneratePoStFallbackSectorChallenges(ppt, abi.ActorID(details.SpID), prand, sectorNums) + if err != nil { + return false, xerrors.Errorf("generating election challenges: %v", err) + } + + sectorChallenges := make([]storiface.PostSectorChallenge, len(mbi.Sectors)) + for i, s := range mbi.Sectors { + sectorChallenges[i] = storiface.PostSectorChallenge{ + SealProof: s.SealProof, + SectorNumber: s.SectorNumber, + SealedCID: s.SealedCID, + Challenge: postChallenges.Challenges[s.SectorNumber], + Update: s.SectorKey != nil, + } + } + + wpostProof, err = t.prover.GenerateWinningPoSt(ctx, ppt, abi.ActorID(details.SpID), sectorChallenges, prand) + if err != nil { + err = xerrors.Errorf("failed to compute winning post proof: %w", err) + return false, err + } + } + + ticket, err := t.computeTicket(ctx, maddr, &rbase, round, base.TipSet.MinTicket(), mbi) + if err != nil { + return false, xerrors.Errorf("scratching ticket failed: %w", err) + } + + // get pending messages early, + msgs, err := t.api.MpoolSelect(ctx, base.TipSet.Key(), ticket.Quality()) + if err != nil { + return false, xerrors.Errorf("failed to select messages for block: %w", err) + } + + // equivocation handling + { + // This next block exists to "catch" equivocating miners, + // who submit 2 blocks at the same height at different times in order to split the network. + // To safeguard against this, we make sure it's been EquivocationDelaySecs since our base was calculated, + // then re-calculate it. + // If the daemon detected equivocated blocks, those blocks will no longer be in the new base. + time.Sleep(time.Until(base.ComputeTime.Add(time.Duration(build.EquivocationDelaySecs) * time.Second))) + + bestTs, err := t.api.ChainHead(ctx) + if err != nil { + return false, xerrors.Errorf("failed to get chain head: %w", err) + } + + headWeight, err := t.api.ChainTipSetWeight(ctx, bestTs.Key()) + if err != nil { + return false, xerrors.Errorf("failed to get chain head weight: %w", err) + } + + baseWeight, err := t.api.ChainTipSetWeight(ctx, base.TipSet.Key()) + if err != nil { + return false, xerrors.Errorf("failed to get base weight: %w", err) + } + if types.BigCmp(headWeight, baseWeight) <= 0 { + bestTs = base.TipSet + } + + // If the base has changed, we take the _intersection_ of our old base and new base, + // thus ejecting blocks from any equivocating miners, without taking any new blocks. + if bestTs.Height() == base.TipSet.Height() && !bestTs.Equals(base.TipSet) { + log.Warnf("base changed from %s to %s, taking intersection", base.TipSet.Key(), bestTs.Key()) + newBaseMap := map[cid.Cid]struct{}{} + for _, newBaseBlk := range bestTs.Cids() { + newBaseMap[newBaseBlk] = struct{}{} + } + + refreshedBaseBlocks := make([]*types.BlockHeader, 0, len(base.TipSet.Cids())) + for _, baseBlk := range base.TipSet.Blocks() { + if _, ok := newBaseMap[baseBlk.Cid()]; ok { + refreshedBaseBlocks = append(refreshedBaseBlocks, baseBlk) + } + } + + if len(refreshedBaseBlocks) != 0 && len(refreshedBaseBlocks) != len(base.TipSet.Blocks()) { + refreshedBase, err := types.NewTipSet(refreshedBaseBlocks) + if err != nil { + return false, xerrors.Errorf("failed to create new tipset when refreshing: %w", err) + } + + if !base.TipSet.MinTicket().Equals(refreshedBase.MinTicket()) { + log.Warn("recomputing ticket due to base refresh") + + ticket, err = t.computeTicket(ctx, maddr, &rbase, round, refreshedBase.MinTicket(), mbi) + if err != nil { + return false, xerrors.Errorf("failed to refresh ticket: %w", err) + } + } + + log.Warn("re-selecting messages due to base refresh") + // refresh messages, as the selected messages may no longer be valid + msgs, err = t.api.MpoolSelect(ctx, refreshedBase.Key(), ticket.Quality()) + if err != nil { + return false, xerrors.Errorf("failed to re-select messages for block: %w", err) + } + + base.TipSet = refreshedBase + } + } + } + + // block construction + var blockMsg *types.BlockMsg + { + uts := base.TipSet.MinTimestamp() + build.BlockDelaySecs*(uint64(base.AddRounds)+1) + + blockMsg, err = t.api.MinerCreateBlock(context.TODO(), &api.BlockTemplate{ + Miner: maddr, + Parents: base.TipSet.Key(), + Ticket: ticket, + Eproof: eproof, + BeaconValues: bvals, + Messages: msgs, + Epoch: round, + Timestamp: uts, + WinningPoStProof: wpostProof, + }) + if err != nil { + return false, xerrors.Errorf("failed to create block: %w", err) + } + } + + // persist in db + { + bhjson, err := json.Marshal(blockMsg.Header) + if err != nil { + return false, xerrors.Errorf("failed to marshal block header: %w", err) + } + + _, err = t.db.Exec(ctx, `UPDATE mining_tasks + SET won = true, mined_cid = $2, mined_header = $3, mined_at = $4 + WHERE task_id = $1`, taskID, blockMsg.Header.Cid(), string(bhjson), time.Now().UTC()) + if err != nil { + return false, xerrors.Errorf("failed to update mining task: %w", err) + } + } + + // wait until block timestamp + { + time.Sleep(time.Until(time.Unix(int64(blockMsg.Header.Timestamp), 0))) + } + + // submit block!! + { + if err := t.api.SyncSubmitBlock(ctx, blockMsg); err != nil { + return false, xerrors.Errorf("failed to submit block: %w", err) + } + } + + log.Infow("mined a block", "tipset", types.LogCids(blockMsg.Header.Parents), "height", blockMsg.Header.Height, "miner", maddr, "cid", blockMsg.Header.Cid()) + + // persist that we've submitted the block + { + _, err = t.db.Exec(ctx, `UPDATE mining_tasks + SET submitted_at = $2 + WHERE task_id = $1`, taskID, time.Now().UTC()) + if err != nil { + return false, xerrors.Errorf("failed to update mining task: %w", err) + } + } + + return true, nil +} + +func (t *WinPostTask) CanAccept(ids []harmonytask.TaskID, engine *harmonytask.TaskEngine) (*harmonytask.TaskID, error) { + if len(ids) == 0 { + // probably can't happen, but panicking is bad + return nil, nil + } + + // select lowest epoch + var lowestEpoch abi.ChainEpoch + var lowestEpochID = ids[0] + for _, id := range ids { + var epoch uint64 + err := t.db.QueryRow(context.Background(), `SELECT epoch FROM mining_tasks WHERE task_id = $1`, id).Scan(&epoch) + if err != nil { + return nil, err + } + + if lowestEpoch == 0 || abi.ChainEpoch(epoch) < lowestEpoch { + lowestEpoch = abi.ChainEpoch(epoch) + lowestEpochID = id + } + } + + return &lowestEpochID, nil +} + +func (t *WinPostTask) TypeDetails() harmonytask.TaskTypeDetails { + return harmonytask.TaskTypeDetails{ + Name: "WinPost", + Max: t.max, + MaxFailures: 3, + Follows: nil, + Cost: resources.Resources{ + Cpu: 1, + + // todo set to something for 32/64G sector sizes? Technically windowPoSt is happy on a CPU + // but it will use a GPU if available + Gpu: 0, + + Ram: 1 << 30, // todo arbitrary number + }, + } +} + +func (t *WinPostTask) Adder(taskFunc harmonytask.AddTaskFunc) { + t.mineTF.Set(taskFunc) +} + +// MiningBase is the tipset on top of which we plan to construct our next block. +// Refer to godocs on GetBestMiningCandidate. +type MiningBase struct { + TipSet *types.TipSet + ComputeTime time.Time + AddRounds abi.ChainEpoch +} + +func (mb MiningBase) epoch() abi.ChainEpoch { + // return the epoch that will result from mining on this base + return mb.TipSet.Height() + mb.AddRounds + 1 +} + +func (mb MiningBase) baseTime() time.Time { + tsTime := time.Unix(int64(mb.TipSet.MinTimestamp()), 0) + roundDelay := build.BlockDelaySecs * uint64(mb.AddRounds+1) + tsTime = tsTime.Add(time.Duration(roundDelay) * time.Second) + return tsTime +} + +func (mb MiningBase) afterPropDelay() time.Time { + return mb.baseTime().Add(randTimeOffset(time.Second)) +} + +func (t *WinPostTask) mineBasic(ctx context.Context) { + var workBase MiningBase + + taskFn := t.mineTF.Val(ctx) + + // initialize workbase + { + head := retry1(func() (*types.TipSet, error) { + return t.api.ChainHead(ctx) + }) + + workBase = MiningBase{ + TipSet: head, + AddRounds: 0, + ComputeTime: time.Now(), + } + } + + /* + + /- T+0 == workBase.baseTime + | + >--------*------*--------[wait until next round]-----> + | + |- T+PD == workBase.afterPropDelay+(~1s) + |- Here we acquire the new workBase, and start a new round task + \- Then we loop around, and wait for the next head + + time --> + */ + + for { + // limit the rate at which we mine blocks to at least EquivocationDelaySecs + // this is to prevent races on devnets in catch up mode. Acts as a minimum + // delay for the sleep below. + time.Sleep(time.Duration(build.EquivocationDelaySecs)*time.Second + time.Second) + + // wait for *NEXT* propagation delay + time.Sleep(time.Until(workBase.afterPropDelay())) + + // check current best candidate + maybeBase := retry1(func() (*types.TipSet, error) { + return t.api.ChainHead(ctx) + }) + + if workBase.TipSet.Equals(maybeBase) { + // workbase didn't change in the new round so we have a null round here + workBase.AddRounds++ + log.Debugw("workbase update", "tipset", workBase.TipSet.Cids(), "nulls", workBase.AddRounds, "lastUpdate", time.Since(workBase.ComputeTime), "type", "same-tipset") + } else { + btsw := retry1(func() (types.BigInt, error) { + return t.api.ChainTipSetWeight(ctx, maybeBase.Key()) + }) + + ltsw := retry1(func() (types.BigInt, error) { + return t.api.ChainTipSetWeight(ctx, workBase.TipSet.Key()) + }) + + if types.BigCmp(btsw, ltsw) <= 0 { + // new tipset for some reason has less weight than the old one, assume null round here + // NOTE: the backing node may have reorged, or manually changed head + workBase.AddRounds++ + log.Debugw("workbase update", "tipset", workBase.TipSet.Cids(), "nulls", workBase.AddRounds, "lastUpdate", time.Since(workBase.ComputeTime), "type", "prefer-local-weight") + } else { + // new tipset has more weight, so we should mine on it, no null round here + log.Debugw("workbase update", "tipset", workBase.TipSet.Cids(), "nulls", workBase.AddRounds, "lastUpdate", time.Since(workBase.ComputeTime), "type", "prefer-new-tipset") + + workBase = MiningBase{ + TipSet: maybeBase, + AddRounds: 0, + ComputeTime: time.Now(), + } + } + } + + // dispatch mining task + // (note equivocation prevention is handled by the mining code) + + baseEpoch := workBase.TipSet.Height() + + for _, act := range t.actors { + spID, err := address.IDFromAddress(address.Address(act)) + if err != nil { + log.Errorf("failed to get spID from address %s: %s", act, err) + continue + } + + taskFn(func(id harmonytask.TaskID, tx *harmonydb.Tx) (shouldCommit bool, seriousError error) { + // First we check if the mining base includes blocks we may have mined previously to avoid getting slashed + // select mining_tasks where epoch==base_epoch if win=true to maybe get base block cid which has to be included in our tipset + var baseBlockCid string + err := tx.QueryRow(`SELECT mined_cid FROM mining_tasks WHERE epoch = $1 AND sp_id = $2 AND won = true`, baseEpoch, spID).Scan(&baseBlockCid) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + return false, xerrors.Errorf("querying mining_tasks: %w", err) + } + if baseBlockCid != "" { + c, err := cid.Parse(baseBlockCid) + if err != nil { + return false, xerrors.Errorf("parsing mined_cid: %w", err) + } + + // we have mined in the previous round, make sure that our block is included in the tipset + // if it's not we risk getting slashed + + var foundOurs bool + for _, c2 := range workBase.TipSet.Cids() { + if c == c2 { + foundOurs = true + break + } + } + if !foundOurs { + log.Errorw("our block was not included in the tipset, aborting", "tipset", workBase.TipSet.Cids(), "ourBlock", c) + return false, xerrors.Errorf("our block was not included in the tipset, aborting") + } + } + + _, err = tx.Exec(`INSERT INTO mining_tasks (task_id, sp_id, epoch, base_compute_time) VALUES ($1, $2, $3, $4)`, id, spID, workBase.epoch(), workBase.ComputeTime.UTC()) + if err != nil { + return false, xerrors.Errorf("inserting mining_tasks: %w", err) + } + + for _, c := range workBase.TipSet.Cids() { + _, err = tx.Exec(`INSERT INTO mining_base_block (task_id, sp_id, block_cid) VALUES ($1, $2, $3)`, id, spID, c) + if err != nil { + return false, xerrors.Errorf("inserting mining base blocks: %w", err) + } + } + + return true, nil // no errors, commit the transaction + }) + } + } +} + +func (t *WinPostTask) computeTicket(ctx context.Context, maddr address.Address, brand *types.BeaconEntry, round abi.ChainEpoch, chainRand *types.Ticket, mbi *api.MiningBaseInfo) (*types.Ticket, error) { + buf := new(bytes.Buffer) + if err := maddr.MarshalCBOR(buf); err != nil { + return nil, xerrors.Errorf("failed to marshal address to cbor: %w", err) + } + + if round > build.UpgradeSmokeHeight { + buf.Write(chainRand.VRFProof) + } + + input, err := lrand.DrawRandomnessFromBase(brand.Data, crypto.DomainSeparationTag_TicketProduction, round-build.TicketRandomnessLookback, buf.Bytes()) + if err != nil { + return nil, err + } + + vrfOut, err := gen.ComputeVRF(ctx, t.api.WalletSign, mbi.WorkerKey, input) + if err != nil { + return nil, err + } + + return &types.Ticket{ + VRFProof: vrfOut, + }, nil +} + +func randTimeOffset(width time.Duration) time.Duration { + buf := make([]byte, 8) + rand.Reader.Read(buf) //nolint:errcheck + val := time.Duration(binary.BigEndian.Uint64(buf) % uint64(width)) + + return val - (width / 2) +} + +func retry1[R any](f func() (R, error)) R { + for { + r, err := f() + if err == nil { + return r + } + + log.Errorw("error in mining loop, retrying", "error", err) + time.Sleep(time.Second) + } +} + +var _ harmonytask.TaskInterface = &WinPostTask{} diff --git a/scripts/lotus-provider.service b/scripts/lotus-provider.service new file mode 100644 index 000000000..ddec181ba --- /dev/null +++ b/scripts/lotus-provider.service @@ -0,0 +1,12 @@ +[Unit] +Description=Lotus Provider +After=network.target +After=lotus-daemon.service + +[Service] +ExecStart=/usr/local/bin/lotus-provider run +Environment=GOLOG_FILE="/var/log/lotus/provider.log" +Environment=GOLOG_LOG_FMT="json" +LimitNOFILE=1000000 +[Install] +WantedBy=multi-user.target diff --git a/storage/paths/db_index.go b/storage/paths/db_index.go new file mode 100644 index 000000000..1e4abfab1 --- /dev/null +++ b/storage/paths/db_index.go @@ -0,0 +1,1001 @@ +package paths + +import ( + "context" + "database/sql" + "errors" + "fmt" + "net/url" + gopath "path" + "strings" + "time" + + "github.com/google/uuid" + "go.opencensus.io/stats" + "go.opencensus.io/tag" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/journal/alerting" + "github.com/filecoin-project/lotus/lib/harmony/harmonydb" + "github.com/filecoin-project/lotus/metrics" + "github.com/filecoin-project/lotus/storage/sealer/fsutil" + "github.com/filecoin-project/lotus/storage/sealer/storiface" +) + +var errAlreadyLocked = errors.New("already locked") + +type DBIndex struct { + alerting *alerting.Alerting + pathAlerts map[storiface.ID]alerting.AlertType + + harmonyDB *harmonydb.DB +} + +func NewDBIndex(al *alerting.Alerting, db *harmonydb.DB) *DBIndex { + return &DBIndex{ + harmonyDB: db, + + alerting: al, + pathAlerts: map[storiface.ID]alerting.AlertType{}, + } +} + +func (dbi *DBIndex) StorageList(ctx context.Context) (map[storiface.ID][]storiface.Decl, error) { + + var sectorEntries []struct { + StorageId string + MinerId sql.NullInt64 + SectorNum sql.NullInt64 + SectorFiletype sql.NullInt32 `db:"sector_filetype"` + IsPrimary sql.NullBool + } + + err := dbi.harmonyDB.Select(ctx, §orEntries, + "SELECT stor.storage_id, miner_id, sector_num, sector_filetype, is_primary FROM storage_path stor LEFT JOIN sector_location sec on stor.storage_id=sec.storage_id") + if err != nil { + return nil, xerrors.Errorf("StorageList DB query fails: %v", err) + } + + byID := map[storiface.ID]map[abi.SectorID]storiface.SectorFileType{} + for _, entry := range sectorEntries { + id := storiface.ID(entry.StorageId) + _, ok := byID[id] + if !ok { + byID[id] = map[abi.SectorID]storiface.SectorFileType{} + } + + // skip sector info for storage paths with no sectors + if !entry.MinerId.Valid { + continue + } + + sectorId := abi.SectorID{ + Miner: abi.ActorID(entry.MinerId.Int64), + Number: abi.SectorNumber(entry.SectorNum.Int64), + } + + byID[id][sectorId] |= storiface.SectorFileType(entry.SectorFiletype.Int32) + } + + out := map[storiface.ID][]storiface.Decl{} + for id, m := range byID { + out[id] = []storiface.Decl{} + for sectorID, fileType := range m { + out[id] = append(out[id], storiface.Decl{ + SectorID: sectorID, + SectorFileType: fileType, + }) + } + } + + return out, nil +} + +func union(a, b []string) []string { + m := make(map[string]bool) + + for _, elem := range a { + m[elem] = true + } + + for _, elem := range b { + if _, ok := m[elem]; !ok { + a = append(a, elem) + } + } + return a +} + +func splitString(str string) []string { + if str == "" { + return []string{} + } + return strings.Split(str, ",") +} + +func (dbi *DBIndex) StorageAttach(ctx context.Context, si storiface.StorageInfo, st fsutil.FsStat) error { + var allow, deny = make([]string, 0, len(si.AllowTypes)), make([]string, 0, len(si.DenyTypes)) + + if _, hasAlert := dbi.pathAlerts[si.ID]; dbi.alerting != nil && !hasAlert { + dbi.pathAlerts[si.ID] = dbi.alerting.AddAlertType("sector-index", "pathconf-"+string(si.ID)) + } + + var hasConfigIssues bool + + for id, typ := range si.AllowTypes { + _, err := storiface.TypeFromString(typ) + if err != nil { + //No need to hard-fail here, just warn the user + //(note that even with all-invalid entries we'll deny all types, so nothing unexpected should enter the path) + hasConfigIssues = true + + if dbi.alerting != nil { + dbi.alerting.Raise(dbi.pathAlerts[si.ID], map[string]interface{}{ + "message": "bad path type in AllowTypes", + "path": string(si.ID), + "idx": id, + "path_type": typ, + "error": err.Error(), + }) + } + + continue + } + allow = append(allow, typ) + } + for id, typ := range si.DenyTypes { + _, err := storiface.TypeFromString(typ) + if err != nil { + //No need to hard-fail here, just warn the user + hasConfigIssues = true + + if dbi.alerting != nil { + dbi.alerting.Raise(dbi.pathAlerts[si.ID], map[string]interface{}{ + "message": "bad path type in DenyTypes", + "path": string(si.ID), + "idx": id, + "path_type": typ, + "error": err.Error(), + }) + } + + continue + } + deny = append(deny, typ) + } + si.AllowTypes = allow + si.DenyTypes = deny + + if dbi.alerting != nil && !hasConfigIssues && dbi.alerting.IsRaised(dbi.pathAlerts[si.ID]) { + dbi.alerting.Resolve(dbi.pathAlerts[si.ID], map[string]string{ + "message": "path config is now correct", + }) + } + + for _, u := range si.URLs { + if _, err := url.Parse(u); err != nil { + return xerrors.Errorf("failed to parse url %s: %w", si.URLs, err) + } + } + + // Single transaction to attach storage which is not present in the DB + _, err := dbi.harmonyDB.BeginTransaction(ctx, func(tx *harmonydb.Tx) (commit bool, err error) { + + var urls sql.NullString + var storageId sql.NullString + err = dbi.harmonyDB.QueryRow(ctx, + "Select storage_id, urls FROM storage_path WHERE storage_id = $1", string(si.ID)).Scan(&storageId, &urls) + if err != nil && !strings.Contains(err.Error(), "no rows in result set") { + return false, xerrors.Errorf("storage attach select fails: %v", err) + } + + // Storage ID entry exists + // TODO: Consider using insert into .. on conflict do update set ... below + if storageId.Valid { + var currUrls []string + if urls.Valid { + currUrls = strings.Split(urls.String, ",") + } + currUrls = union(currUrls, si.URLs) + + _, err = dbi.harmonyDB.Exec(ctx, + "UPDATE storage_path set urls=$1, weight=$2, max_storage=$3, can_seal=$4, can_store=$5, groups=$6, allow_to=$7, allow_types=$8, deny_types=$9 WHERE storage_id=$10", + strings.Join(currUrls, ","), + si.Weight, + si.MaxStorage, + si.CanSeal, + si.CanStore, + strings.Join(si.Groups, ","), + strings.Join(si.AllowTo, ","), + strings.Join(si.AllowTypes, ","), + strings.Join(si.DenyTypes, ","), + si.ID) + if err != nil { + return false, xerrors.Errorf("storage attach UPDATE fails: %v", err) + } + + return true, nil + } + + // Insert storage id + _, err = dbi.harmonyDB.Exec(ctx, + "INSERT INTO storage_path "+ + "Values($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)", + si.ID, + strings.Join(si.URLs, ","), + si.Weight, + si.MaxStorage, + si.CanSeal, + si.CanStore, + strings.Join(si.Groups, ","), + strings.Join(si.AllowTo, ","), + strings.Join(si.AllowTypes, ","), + strings.Join(si.DenyTypes, ","), + st.Capacity, + st.Available, + st.FSAvailable, + st.Reserved, + st.Used, + time.Now()) + if err != nil { + return false, xerrors.Errorf("StorageAttach insert fails: %v", err) + } + return true, nil + }) + if err != nil { + return err + } + + return nil +} + +func (dbi *DBIndex) StorageDetach(ctx context.Context, id storiface.ID, url string) error { + + // If url not in path urls, error out + // if this is only path url for this storage path, drop storage path and sector decls which have this as a storage path + + var qUrls string + err := dbi.harmonyDB.QueryRow(ctx, "SELECT COALESCE(urls,'') FROM storage_path WHERE storage_id=$1", string(id)).Scan(&qUrls) + if err != nil { + return err + } + urls := splitString(qUrls) + + var modUrls []string + for _, u := range urls { + if u != url { + modUrls = append(modUrls, u) + } + } + + // noop if url doesn't exist in urls + if len(modUrls) == len(urls) { + return nil + } + + if len(modUrls) > 0 { + newUrls := strings.Join(modUrls, ",") + _, err := dbi.harmonyDB.Exec(ctx, "UPDATE storage_path set urls=$1 WHERE storage_id=$2", newUrls, id) + if err != nil { + return err + } + + log.Warnw("Dropping sector path endpoint", "path", id, "url", url) + } else { + // Single transaction to drop storage path and sector decls which have this as a storage path + _, err := dbi.harmonyDB.BeginTransaction(ctx, func(tx *harmonydb.Tx) (commit bool, err error) { + // Drop storage path completely + _, err = dbi.harmonyDB.Exec(ctx, "DELETE FROM storage_path WHERE storage_id=$1", id) + if err != nil { + return false, err + } + + // Drop all sectors entries which use this storage path + _, err = dbi.harmonyDB.Exec(ctx, "DELETE FROM sector_location WHERE storage_id=$1", id) + if err != nil { + return false, err + } + return true, nil + }) + if err != nil { + return err + } + log.Warnw("Dropping sector storage", "path", id) + } + + return nil +} + +func (dbi *DBIndex) StorageReportHealth(ctx context.Context, id storiface.ID, report storiface.HealthReport) error { + + var canSeal, canStore bool + err := dbi.harmonyDB.QueryRow(ctx, + "SELECT can_seal, can_store FROM storage_path WHERE storage_id=$1", id).Scan(&canSeal, &canStore) + if err != nil { + return xerrors.Errorf("Querying for storage id %s fails with err %v", id, err) + } + + _, err = dbi.harmonyDB.Exec(ctx, + "UPDATE storage_path set capacity=$1, available=$2, fs_available=$3, reserved=$4, used=$5, last_heartbeat=$6", + report.Stat.Capacity, + report.Stat.Available, + report.Stat.FSAvailable, + report.Stat.Reserved, + report.Stat.Used, + time.Now()) + if err != nil { + return xerrors.Errorf("updating storage health in DB fails with err: %v", err) + } + + if report.Stat.Capacity > 0 { + ctx, _ = tag.New(ctx, + tag.Upsert(metrics.StorageID, string(id)), + tag.Upsert(metrics.PathStorage, fmt.Sprint(canStore)), + tag.Upsert(metrics.PathSeal, fmt.Sprint(canSeal)), + ) + + stats.Record(ctx, metrics.StorageFSAvailable.M(float64(report.Stat.FSAvailable)/float64(report.Stat.Capacity))) + stats.Record(ctx, metrics.StorageAvailable.M(float64(report.Stat.Available)/float64(report.Stat.Capacity))) + stats.Record(ctx, metrics.StorageReserved.M(float64(report.Stat.Reserved)/float64(report.Stat.Capacity))) + + stats.Record(ctx, metrics.StorageCapacityBytes.M(report.Stat.Capacity)) + stats.Record(ctx, metrics.StorageFSAvailableBytes.M(report.Stat.FSAvailable)) + stats.Record(ctx, metrics.StorageAvailableBytes.M(report.Stat.Available)) + stats.Record(ctx, metrics.StorageReservedBytes.M(report.Stat.Reserved)) + + if report.Stat.Max > 0 { + stats.Record(ctx, metrics.StorageLimitUsed.M(float64(report.Stat.Used)/float64(report.Stat.Max))) + stats.Record(ctx, metrics.StorageLimitUsedBytes.M(report.Stat.Used)) + stats.Record(ctx, metrics.StorageLimitMaxBytes.M(report.Stat.Max)) + } + } + + return nil +} + +// function to check if a filetype is valid +func (dbi *DBIndex) checkFileType(fileType storiface.SectorFileType) bool { + ftValid := false + for _, fileTypeValid := range storiface.PathTypes { + if fileTypeValid&fileType == 0 { + ftValid = true + break + } + } + return ftValid +} + +func (dbi *DBIndex) StorageDeclareSector(ctx context.Context, storageID storiface.ID, s abi.SectorID, ft storiface.SectorFileType, primary bool) error { + + if !dbi.checkFileType(ft) { + return xerrors.Errorf("invalid filetype") + } + + _, err := dbi.harmonyDB.BeginTransaction(ctx, func(tx *harmonydb.Tx) (commit bool, err error) { + var currPrimary sql.NullBool + err = dbi.harmonyDB.QueryRow(ctx, + "SELECT is_primary FROM sector_location WHERE miner_id=$1 and sector_num=$2 and sector_filetype=$3 and storage_id=$4", + uint64(s.Miner), uint64(s.Number), int(ft), string(storageID)).Scan(&currPrimary) + if err != nil && !strings.Contains(err.Error(), "no rows in result set") { + return false, xerrors.Errorf("DB SELECT fails: %v", err) + } + + // If storage id already exists for this sector, update primary if need be + if currPrimary.Valid { + if !currPrimary.Bool && primary { + _, err = dbi.harmonyDB.Exec(ctx, + "UPDATE sector_location set is_primary = TRUE WHERE miner_id=$1 and sector_num=$2 and sector_filetype=$3 and storage_id=$4", + s.Miner, s.Number, ft, storageID) + if err != nil { + return false, xerrors.Errorf("DB update fails: %v", err) + } + } else { + log.Warnf("sector %v redeclared in %s", s, storageID) + } + } else { + _, err = dbi.harmonyDB.Exec(ctx, + "INSERT INTO sector_location "+ + "values($1, $2, $3, $4, $5)", + s.Miner, s.Number, ft, storageID, primary) + if err != nil { + return false, xerrors.Errorf("DB insert fails: %v", err) + } + } + + return true, nil + }) + if err != nil { + return err + } + + return nil +} + +func (dbi *DBIndex) StorageDropSector(ctx context.Context, storageID storiface.ID, s abi.SectorID, ft storiface.SectorFileType) error { + + if !dbi.checkFileType(ft) { + return xerrors.Errorf("invalid filetype") + } + + _, err := dbi.harmonyDB.Exec(ctx, + "DELETE FROM sector_location WHERE miner_id=$1 and sector_num=$2 and sector_filetype=$3 and storage_id=$4", + int(s.Miner), int(s.Number), int(ft), string(storageID)) + if err != nil { + return xerrors.Errorf("StorageDropSector DELETE query fails: %v", err) + } + + return nil +} + +func (dbi *DBIndex) StorageFindSector(ctx context.Context, s abi.SectorID, ft storiface.SectorFileType, ssize abi.SectorSize, allowFetch bool) ([]storiface.SectorStorageInfo, error) { + + var result []storiface.SectorStorageInfo + + allowList := make(map[string]struct{}) + storageWithSector := map[string]bool{} + + type dbRes struct { + StorageId string + Count uint64 + IsPrimary bool + Urls string + Weight uint64 + CanSeal bool + CanStore bool + Groups string + AllowTo string + AllowTypes string + DenyTypes string + } + + var rows []dbRes + + fts := ft.AllSet() + // Find all storage info which already hold this sector + filetype + err := dbi.harmonyDB.Select(ctx, &rows, + ` SELECT DISTINCT ON (stor.storage_id) + stor.storage_id, + COUNT(*) OVER(PARTITION BY stor.storage_id) as count, + BOOL_OR(is_primary) OVER(PARTITION BY stor.storage_id) AS is_primary, + urls, + weight, + can_seal, + can_store, + groups, + allow_to, + allow_types, + deny_types + FROM sector_location sec + JOIN storage_path stor ON sec.storage_id = stor.storage_id + WHERE sec.miner_id = $1 + AND sec.sector_num = $2 + AND sec.sector_filetype = ANY($3) + ORDER BY stor.storage_id`, + s.Miner, s.Number, fts) + if err != nil { + return nil, xerrors.Errorf("Finding sector storage from DB fails with err: %v", err) + } + + for _, row := range rows { + + // Parse all urls + var urls, burls []string + for _, u := range splitString(row.Urls) { + rl, err := url.Parse(u) + if err != nil { + return nil, xerrors.Errorf("failed to parse url: %w", err) + } + rl.Path = gopath.Join(rl.Path, ft.String(), storiface.SectorName(s)) + urls = append(urls, rl.String()) + burls = append(burls, u) + } + + result = append(result, storiface.SectorStorageInfo{ + ID: storiface.ID(row.StorageId), + URLs: urls, + BaseURLs: burls, + Weight: row.Weight * row.Count, + CanSeal: row.CanSeal, + CanStore: row.CanStore, + Primary: row.IsPrimary, + AllowTypes: splitString(row.AllowTypes), + DenyTypes: splitString(row.DenyTypes), + }) + + storageWithSector[row.StorageId] = true + + allowTo := splitString(row.AllowTo) + if allowList != nil && len(allowTo) > 0 { + for _, group := range allowTo { + allowList[group] = struct{}{} + } + } else { + allowList = nil // allow to any + } + } + + // Find all storage paths which can hold this sector if allowFetch is true + if allowFetch { + spaceReq, err := ft.SealSpaceUse(ssize) + if err != nil { + return nil, xerrors.Errorf("estimating required space: %w", err) + } + + // Conditions to satisfy when choosing a sector + // 1. CanSeal is true + // 2. Available >= spaceReq + // 3. curr_time - last_heartbeat < SkippedHeartbeatThresh + // 4. heartbeat_err is NULL + // 5. not one of the earlier picked storage ids + // 6. !ft.AnyAllowed(st.info.AllowTypes, st.info.DenyTypes) + // 7. Storage path is part of the groups which are allowed from the storage paths which already hold the sector + + var rows []struct { + StorageId string + Urls string + Weight uint64 + CanSeal bool + CanStore bool + Groups string + AllowTypes string + DenyTypes string + } + err = dbi.harmonyDB.Select(ctx, &rows, + `SELECT storage_id, + urls, + weight, + can_seal, + can_store, + groups, + allow_types, + deny_types + FROM storage_path + WHERE can_seal=true + and available >= $1 + and NOW()-last_heartbeat < $2 + and heartbeat_err is null`, + spaceReq, SkippedHeartbeatThresh) + if err != nil { + return nil, xerrors.Errorf("Selecting allowfetch storage paths from DB fails err: %v", err) + } + + for _, row := range rows { + if ok := storageWithSector[row.StorageId]; ok { + continue + } + + if !ft.AnyAllowed(splitString(row.AllowTypes), splitString(row.DenyTypes)) { + log.Debugf("not selecting on %s, not allowed by file type filters", row.StorageId) + continue + } + + if allowList != nil { + groups := splitString(row.Groups) + allow := false + for _, group := range groups { + if _, found := allowList[group]; found { + log.Debugf("path %s in allowed group %s", row.StorageId, group) + allow = true + break + } + } + + if !allow { + log.Debugf("not selecting on %s, not in allowed group, allow %+v; path has %+v", row.StorageId, allowList, groups) + continue + } + } + + var urls, burls []string + for _, u := range splitString(row.Urls) { + rl, err := url.Parse(u) + if err != nil { + return nil, xerrors.Errorf("failed to parse url: %w", err) + } + rl.Path = gopath.Join(rl.Path, ft.String(), storiface.SectorName(s)) + urls = append(urls, rl.String()) + burls = append(burls, u) + } + + result = append(result, storiface.SectorStorageInfo{ + ID: storiface.ID(row.StorageId), + URLs: urls, + BaseURLs: burls, + Weight: row.Weight * 0, + CanSeal: row.CanSeal, + CanStore: row.CanStore, + Primary: false, + AllowTypes: splitString(row.AllowTypes), + DenyTypes: splitString(row.DenyTypes), + }) + } + } + + return result, nil +} + +func (dbi *DBIndex) StorageInfo(ctx context.Context, id storiface.ID) (storiface.StorageInfo, error) { + + var qResults []struct { + Urls string + Weight uint64 + MaxStorage uint64 + CanSeal bool + CanStore bool + Groups string + AllowTo string + AllowTypes string + DenyTypes string + } + + err := dbi.harmonyDB.Select(ctx, &qResults, + "SELECT urls, weight, max_storage, can_seal, can_store, groups, allow_to, allow_types, deny_types "+ + "FROM storage_path WHERE storage_id=$1", string(id)) + if err != nil { + return storiface.StorageInfo{}, xerrors.Errorf("StorageInfo query fails: %v", err) + } + + var sinfo storiface.StorageInfo + sinfo.ID = id + sinfo.URLs = splitString(qResults[0].Urls) + sinfo.Weight = qResults[0].Weight + sinfo.MaxStorage = qResults[0].MaxStorage + sinfo.CanSeal = qResults[0].CanSeal + sinfo.CanStore = qResults[0].CanStore + sinfo.Groups = splitString(qResults[0].Groups) + sinfo.AllowTo = splitString(qResults[0].AllowTo) + sinfo.AllowTypes = splitString(qResults[0].AllowTypes) + sinfo.DenyTypes = splitString(qResults[0].DenyTypes) + + return sinfo, nil +} + +func (dbi *DBIndex) StorageBestAlloc(ctx context.Context, allocate storiface.SectorFileType, ssize abi.SectorSize, pathType storiface.PathType) ([]storiface.StorageInfo, error) { + var err error + var spaceReq uint64 + switch pathType { + case storiface.PathSealing: + spaceReq, err = allocate.SealSpaceUse(ssize) + case storiface.PathStorage: + spaceReq, err = allocate.StoreSpaceUse(ssize) + default: + return nil, xerrors.Errorf("unexpected path type") + } + if err != nil { + return nil, xerrors.Errorf("estimating required space: %w", err) + } + + var rows []struct { + StorageId string + Urls string + Weight uint64 + MaxStorage uint64 + CanSeal bool + CanStore bool + Groups string + AllowTo string + AllowTypes string + DenyTypes string + } + + err = dbi.harmonyDB.Select(ctx, &rows, + `SELECT storage_id, + urls, + weight, + max_storage, + can_seal, + can_store, + groups, + allow_to, + allow_types, + deny_types + FROM storage_path + WHERE available >= $1 + and NOW()-last_heartbeat < $2 + and heartbeat_err is null + and ($3 and can_seal = TRUE or $4 and can_store = TRUE) + order by (available::numeric * weight) desc`, + spaceReq, + SkippedHeartbeatThresh, + pathType == storiface.PathSealing, + pathType == storiface.PathStorage, + ) + if err != nil { + return nil, xerrors.Errorf("Querying for best storage sectors fails with err %w: ", err) + } + + var result []storiface.StorageInfo + for _, row := range rows { + result = append(result, storiface.StorageInfo{ + ID: storiface.ID(row.StorageId), + URLs: splitString(row.Urls), + Weight: row.Weight, + MaxStorage: row.MaxStorage, + CanSeal: row.CanSeal, + CanStore: row.CanStore, + Groups: splitString(row.Groups), + AllowTo: splitString(row.AllowTo), + AllowTypes: splitString(row.AllowTypes), + DenyTypes: splitString(row.DenyTypes), + }) + } + + return result, nil +} + +// timeout after which we consider a lock to be stale +const LockTimeOut = 300 * time.Second + +func isLocked(ts sql.NullTime) bool { + return ts.Valid && ts.Time.After(time.Now().Add(-LockTimeOut)) +} + +func (dbi *DBIndex) lock(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType, lockUuid uuid.UUID) (bool, error) { + if read|write == 0 { + return false, nil + } + + if read|write > (1< (1< 0 { + if len(skipped) > 0 && !allowSkip { // This should happen rarely because before entering GenerateWindowPoSt we check all sectors by reading challenges. // When it does happen, window post runner logic will just re-check sectors, and retry with newly-discovered-bad sectors skipped log.Errorf("couldn't read some challenges (skipped %d)", len(skipped)) @@ -802,6 +807,7 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) { if err != nil { log.Errorf("getting gpu devices failed: %+v", err) } + log.Infow("Detected GPU devices.", "count", len(gpus)) memPhysical, memUsed, memSwap, memSwapUsed, err := l.memInfo() if err != nil { diff --git a/storage/wdpost/wdpost_changehandler.go b/storage/wdpost/wdpost_changehandler.go index 5f4b0ca0c..ce58f1489 100644 --- a/storage/wdpost/wdpost_changehandler.go +++ b/storage/wdpost/wdpost_changehandler.go @@ -20,9 +20,9 @@ const ( type CompleteGeneratePoSTCb func(posts []miner.SubmitWindowedPoStParams, err error) type CompleteSubmitPoSTCb func(err error) -// wdPoStCommands is the subset of the WindowPoStScheduler + full node APIs used +// WdPoStCommands is the subset of the WindowPoStScheduler + full node APIs used // by the changeHandler to execute actions and query state. -type wdPoStCommands interface { +type WdPoStCommands interface { StateMinerProvingDeadline(context.Context, address.Address, types.TipSetKey) (*dline.Info, error) startGeneratePoST(ctx context.Context, ts *types.TipSet, deadline *dline.Info, onComplete CompleteGeneratePoSTCb) context.CancelFunc @@ -32,13 +32,13 @@ type wdPoStCommands interface { } type changeHandler struct { - api wdPoStCommands + api WdPoStCommands actor address.Address proveHdlr *proveHandler submitHdlr *submitHandler } -func newChangeHandler(api wdPoStCommands, actor address.Address) *changeHandler { +func newChangeHandler(api WdPoStCommands, actor address.Address) *changeHandler { posts := newPostsCache() p := newProver(api, posts) s := newSubmitter(api, posts) @@ -148,7 +148,7 @@ type postResult struct { // proveHandler generates proofs type proveHandler struct { - api wdPoStCommands + api WdPoStCommands posts *postsCache postResults chan *postResult @@ -165,7 +165,7 @@ type proveHandler struct { } func newProver( - api wdPoStCommands, + api WdPoStCommands, posts *postsCache, ) *proveHandler { ctx, cancel := context.WithCancel(context.Background()) @@ -230,7 +230,7 @@ func (p *proveHandler) processHeadChange(ctx context.Context, newTS *types.TipSe // next post window _, complete := p.posts.get(di) for complete { - di = nextDeadline(di) + di = NextDeadline(di) _, complete = p.posts.get(di) } @@ -297,7 +297,7 @@ type postInfo struct { // submitHandler submits proofs on-chain type submitHandler struct { - api wdPoStCommands + api WdPoStCommands posts *postsCache submitResults chan *submitResult @@ -321,7 +321,7 @@ type submitHandler struct { } func newSubmitter( - api wdPoStCommands, + api WdPoStCommands, posts *postsCache, ) *submitHandler { ctx, cancel := context.WithCancel(context.Background()) @@ -525,8 +525,8 @@ func (s *submitHandler) getPostWindow(di *dline.Info) *postWindow { return <-out } -// nextDeadline gets deadline info for the subsequent deadline -func nextDeadline(currentDeadline *dline.Info) *dline.Info { +// NextDeadline gets deadline info for the subsequent deadline +func NextDeadline(currentDeadline *dline.Info) *dline.Info { periodStart := currentDeadline.PeriodStart newDeadline := currentDeadline.Index + 1 if newDeadline == miner.WPoStPeriodDeadlines { diff --git a/storage/wdpost/wdpost_changehandler_test.go b/storage/wdpost/wdpost_changehandler_test.go index dac6c4558..44d0dfe6d 100644 --- a/storage/wdpost/wdpost_changehandler_test.go +++ b/storage/wdpost/wdpost_changehandler_test.go @@ -441,7 +441,7 @@ func TestChangeHandlerStartProvingNextDeadline(t *testing.T) { // Trigger head change that advances the chain to the Challenge epoch for // the next deadline go func() { - di = nextDeadline(di) + di = NextDeadline(di) currentEpoch = di.Challenge + ChallengeConfidence triggerHeadAdvance(t, s, currentEpoch) }() @@ -474,7 +474,7 @@ func TestChangeHandlerProvingRounds(t *testing.T) { <-s.ch.proveHdlr.processedHeadChanges completeProofEpoch := di.Open + completeProofIndex - next := nextDeadline(di) + next := NextDeadline(di) //fmt.Println("epoch", currentEpoch, s.mock.getPostStatus(di), "next", s.mock.getPostStatus(next)) if currentEpoch >= next.Challenge { require.Equal(t, postStatusComplete, s.mock.getPostStatus(di)) @@ -962,7 +962,7 @@ func TestChangeHandlerSubmitRevertTwoEpochs(t *testing.T) { require.Equal(t, postStatusComplete, s.mock.getPostStatus(diE1)) // Move to the challenge epoch for the next deadline - diE2 := nextDeadline(diE1) + diE2 := NextDeadline(diE1) currentEpoch = diE2.Challenge + ChallengeConfidence go triggerHeadAdvance(t, s, currentEpoch) @@ -1067,7 +1067,7 @@ func TestChangeHandlerSubmitRevertAdvanceLess(t *testing.T) { require.Equal(t, postStatusComplete, s.mock.getPostStatus(diE1)) // Move to the challenge epoch for the next deadline - diE2 := nextDeadline(diE1) + diE2 := NextDeadline(diE1) currentEpoch = diE2.Challenge + ChallengeConfidence go triggerHeadAdvance(t, s, currentEpoch) diff --git a/storage/wdpost/wdpost_nextdl_test.go b/storage/wdpost/wdpost_nextdl_test.go index d591c1e88..ef140de30 100644 --- a/storage/wdpost/wdpost_nextdl_test.go +++ b/storage/wdpost/wdpost_nextdl_test.go @@ -24,7 +24,7 @@ func TestNextDeadline(t *testing.T) { for i := 1; i < 1+int(minertypes.WPoStPeriodDeadlines)*2; i++ { //stm: @WDPOST_NEXT_DEADLINE_001 - di = nextDeadline(di) + di = NextDeadline(di) deadlineIdx = i % int(minertypes.WPoStPeriodDeadlines) expPeriodStart := int(minertypes.WPoStProvingPeriod) * (i / int(minertypes.WPoStPeriodDeadlines)) expOpen := expPeriodStart + deadlineIdx*int(minertypes.WPoStChallengeWindow) diff --git a/storage/wdpost/wdpost_run.go b/storage/wdpost/wdpost_run.go index e8a59f629..edc088ccf 100644 --- a/storage/wdpost/wdpost_run.go +++ b/storage/wdpost/wdpost_run.go @@ -313,6 +313,7 @@ func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, manual bool, di // allowed in a single message partitionBatches, err := s.BatchPartitions(partitions, nv) if err != nil { + log.Errorf("batch partitions failed: %+v", err) return nil, err } diff --git a/storage/wdpost/wdpost_sched.go b/storage/wdpost/wdpost_sched.go index aaa7361b4..bbf4596fe 100644 --- a/storage/wdpost/wdpost_sched.go +++ b/storage/wdpost/wdpost_sched.go @@ -23,6 +23,7 @@ import ( "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/journal" "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/node/modules/dtypes" "github.com/filecoin-project/lotus/storage/ctladdr" "github.com/filecoin-project/lotus/storage/sealer" "github.com/filecoin-project/lotus/storage/sealer/storiface" @@ -33,6 +34,7 @@ var log = logging.Logger("wdpost") type NodeAPI interface { ChainHead(context.Context) (*types.TipSet, error) ChainNotify(context.Context) (<-chan []*api.HeadChange, error) + ChainGetTipSet(context.Context, types.TipSetKey) (*types.TipSet, error) StateMinerInfo(context.Context, address.Address, types.TipSetKey) (api.MinerInfo, error) StateMinerProvingDeadline(context.Context, address.Address, types.TipSetKey) (*dline.Info, error) @@ -86,6 +88,11 @@ type WindowPoStScheduler struct { // failLk sync.Mutex } +type ActorInfo struct { + address.Address + api.MinerInfo +} + // NewWindowedPoStScheduler creates a new WindowPoStScheduler scheduler. func NewWindowedPoStScheduler(api NodeAPI, cfg config.MinerFeeConfig, @@ -95,12 +102,18 @@ func NewWindowedPoStScheduler(api NodeAPI, verif storiface.Verifier, ft sealer.FaultTracker, j journal.Journal, - actor address.Address) (*WindowPoStScheduler, error) { - mi, err := api.StateMinerInfo(context.TODO(), actor, types.EmptyTSK) - if err != nil { - return nil, xerrors.Errorf("getting sector size: %w", err) + actors []dtypes.MinerAddress) (*WindowPoStScheduler, error) { + var actorInfos []ActorInfo + + for _, actor := range actors { + mi, err := api.StateMinerInfo(context.TODO(), address.Address(actor), types.EmptyTSK) + if err != nil { + return nil, xerrors.Errorf("getting sector size: %w", err) + } + actorInfos = append(actorInfos, ActorInfo{address.Address(actor), mi}) } + // TODO I punted here knowing that actorInfos will be consumed differently later. return &WindowPoStScheduler{ api: api, feeCfg: cfg, @@ -108,13 +121,13 @@ func NewWindowedPoStScheduler(api NodeAPI, prover: sp, verifier: verif, faultTracker: ft, - proofType: mi.WindowPoStProofType, - partitionSectors: mi.WindowPoStPartitionSectors, + proofType: actorInfos[0].WindowPoStProofType, + partitionSectors: actorInfos[0].WindowPoStPartitionSectors, + actor: address.Address(actors[0]), disablePreChecks: pcfg.DisableWDPoStPreChecks, maxPartitionsPerPostMessage: pcfg.MaxPartitionsPerPoStMessage, maxPartitionsPerRecoveryMessage: pcfg.MaxPartitionsPerRecoveryMessage, singleRecoveringPartitionPerPostMessage: pcfg.SingleRecoveringPartitionPerPostMessage, - actor: actor, evtTypes: [...]journal.EventType{ evtTypeWdPoStScheduler: j.RegisterEventType("wdpost", "scheduler"), evtTypeWdPoStProofs: j.RegisterEventType("wdpost", "proofs_processed"), @@ -126,8 +139,6 @@ func NewWindowedPoStScheduler(api NodeAPI, } func (s *WindowPoStScheduler) Run(ctx context.Context) { - // Initialize change handler. - // callbacks is a union of the fullNodeFilteredAPI and ourselves. callbacks := struct { NodeAPI