diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index a97d1f64..00000000 --- a/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM golang:alpine as builder -RUN apk --update --no-cache add make git g++ - -# Build statically linked vDB binary (wonky path because of Dep) -RUN mkdir -p /go/src/github.com/vulcanize/ipfs-blockchain-watcher -ADD . /go/src/github.com/vulcanize/ipfs-blockchain-watcher -WORKDIR /go/src/github.com/vulcanize/ipfs-blockchain-watcher -RUN GCO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' . - -# Build migration tool -RUN go get -u -d github.com/pressly/goose/cmd/goose -WORKDIR /go/src/github.com/pressly/goose/cmd/goose -RUN GCO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -tags='no_mysql no_sqlite' -o goose - -# Second stage -FROM alpine -COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/vulcanizedb /app/vulcanizedb -COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/environments/staging.toml /app/environments/ -COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/dockerfiles/startup_script.sh /app/ -COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/db/migrations/* /app/ -COPY --from=builder /go/src/github.com/pressly/goose/cmd/goose/goose /app/goose - -WORKDIR /app -CMD ["./startup_script.sh"] diff --git a/Makefile b/Makefile index fc031472..9848db9b 100644 --- a/Makefile +++ b/Makefile @@ -105,7 +105,7 @@ rollback: $(GOOSE) checkdbvars pg_dump -O -s $(CONNECT_STRING) > db/schema.sql -## Rollbackt to a select migration (id/timestamp) +## Rollback to a select migration (id/timestamp) .PHONY: rollback_to rollback_to: $(GOOSE) checkmigration checkdbvars $(GOOSE) -dir db/migrations postgres "$(CONNECT_STRING)" down-to "$(MIGRATION)" @@ -137,26 +137,3 @@ version_migrations: import: test -n "$(NAME)" # $$NAME psql $(NAME) < db/schema.sql - - -# Docker actions -## Rinkeby docker environment -RINKEBY_COMPOSE_FILE=dockerfiles/rinkeby/docker-compose.yml - -.PHONY: rinkeby_env_up -rinkeby_env_up: - docker-compose -f $(RINKEBY_COMPOSE_FILE) up -d geth - docker-compose -f $(RINKEBY_COMPOSE_FILE) up --build migrations - docker-compose -f $(RINKEBY_COMPOSE_FILE) up -d --build vulcanizedb - -.PHONY: rinkeby_env_deploy -rinkeby_env_deploy: - docker-compose -f $(RINKEBY_COMPOSE_FILE) up -d --build vulcanizedb - -.PHONY: dev_env_migrate -rinkeby_env_migrate: - docker-compose -f $(RINKEBY_COMPOSE_FILE) up --build migrations - -.PHONY: rinkeby_env_down -rinkeby_env_down: - docker-compose -f $(RINKEBY_COMPOSE_FILE) down diff --git a/README.md b/README.md index 844e44fc..2748c8e6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Go Report Card](https://goreportcard.com/badge/github.com/vulcanize/ipfs-blockchain-watcher)](https://goreportcard.com/report/github.com/vulcanize/ipfs-blockchain-watcher) -> Tool for extracting and indexing blockchain data on PG-IPFS +> ipfs-blockchain-watcher is used to extract, transform, and load all eth or btc data into an IPFS-backing Postgres datastore while generating useful secondary indexes around the data in other Postgres tables ## Table of Contents 1. [Background](#background) @@ -22,12 +22,16 @@ Currently the service supports complete processing of all Bitcoin and Ethereum d More details on the design of ipfs-blockchain-watcher can be found in [here](./documentation/architecture.md) ## Install -1. [Postgres](#postgres) 1. [Goose](#goose) +1. [Postgres](#postgres) 1. [IPFS](#ipfs) 1. [Blockchain](#blockchain) 1. [Watcher](#watcher) +### Goose +[goose](https://github.com/pressly/goose) is used for migration management. While it is not necessary to use `goose` for manual setup, it +is required for running the automated tests and is used by the `make migrate` command. + ### Postgres 1. [Install Postgres](https://wiki.postgresql.org/wiki/Detailed_installation_guides) 1. Create a superuser for yourself and make sure `psql --list` works without prompting for a password. @@ -46,75 +50,30 @@ localhost. To allow access on Ubuntu, set localhost connections via hostname, ip (It should be noted that trusted auth should only be enabled on systems without sensitive data in them: development and local test databases) -### Goose -We use [goose](https://github.com/pressly/goose) as our migration management tool. While it is not necessary to use `goose` for manual setup, it -is required for running the automated tests. - ### IPFS -We use IPFS to store IPLD objects for each type of data we extract from on chain. +Data is stored in an [IPFS-backing Postgres datastore](https://github.com/ipfs/go-ds-sql). +By default data is written directly to the ipfs blockstore in Postgres; the public.blocks table. +In this case no further IPFS configuration is needed at this time. -To start, download and install [IPFS](https://github.com/vulcanize/go-ipfs): +Optionally, ipfs-blockchain-watcher can be configured to function through an internal ipfs node interface using the flag: `-ipfs-mode=interface`. +Operating through the ipfs interface provides the option to configure a block exchange that can search remotely for IPLD data found missing in the local datastore. +This option is irrelevant in most cases and this mode has some disadvantages, namely: -`go get github.com/ipfs/go-ipfs` +1. Environment must have IPFS configured +1. Process will contend with the lockfile at `$IPFS_PATH` +1. Publishing and indexing of data must occur in separate db transactions -`cd $GOPATH/src/github.com/ipfs/go-ipfs` - -`make install` - -If we want to use Postgres as our backing datastore, we need to use the vulcanize fork of go-ipfs. - -Start by adding the fork and switching over to it: - -`git remote add vulcanize https://github.com/vulcanize/go-ipfs.git` - -`git fetch vulcanize` - -`git checkout -b postgres_update vulcanize/postgres_update` - -Now install this fork of ipfs, first be sure to remove any previous installation: - -`make install` - -Check that is installed properly by running: - -`ipfs` - -You should see the CLI info/help output. - -And now we initialize with the `postgresds` profile. -If ipfs was previously initialized we will need to remove the old profile first. -We also need to provide env variables for the postgres connection: - -We can either set these manually, e.g. -```bash -export IPFS_PGHOST= -export IPFS_PGUSER= -export IPFS_PGDATABASE= -export IPFS_PGPORT= -export IPFS_PGPASSWORD= -``` - -And then run the ipfs command: - -`ipfs init --profile=postgresds` - -Or we can use the pre-made script at `GOPATH/src/github.com/ipfs/go-ipfs/misc/utility/ipfs_postgres.sh` -which has usage: - -`./ipfs_postgres.sh "` - -and will ask us to enter the password, avoiding storing it to an ENV variable. - -Once we have initialized ipfs, that is all we need to do with it- we do not need to run a daemon during the subsequent processes. +More information for configuring Postgres-IPFS can be found [here](./documentation/ipfs.md) ### Blockchain This section describes how to setup an Ethereum or Bitcoin node to serve as a data source for ipfs-blockchain-watcher #### Ethereum -For Ethereum, we currently *require* [a special fork of go-ethereum](https://github.com/vulcanize/go-ethereum/tree/statediff_at_anyblock-1.9.11). This can be setup as follows. -Skip this steps if you already have access to a node that displays the statediffing endpoints. +For Ethereum, [a special fork of go-ethereum](https://github.com/vulcanize/go-ethereum/tree/statediff_at_anyblock-1.9.11) is currently *requirde*. +This can be setup as follows. +Skip this step if you already have access to a node that displays the statediffing endpoints. -Begin by downloading geth and switching to the vulcanize/rpc_statediffing branch: +Begin by downloading geth and switching to the statediffing branch: `go get github.com/ethereum/go-ethereum` @@ -134,33 +93,34 @@ And run the output binary with statediffing turned on: `cd $GOPATH/src/github.com/ethereum/go-ethereum/build/bin` -`./geth --statediff --statediff.streamblock --ws --syncmode=full` +`./geth --syncmode=full --statediff --ws` -Note: if you wish to access historical data (perform `backFill`) then the node will need to operate as an archival node (`--gcmode=archive`) +Note: to access historical data (perform `backFill`) the node will need to operate as an archival node (`--gcmode=archive`) with rpc endpoints +exposed (`--rpc --rpcapi=eth,statediff,net`) -Note: other CLI options- statediff specific ones included- can be explored with `./geth help` +Warning: There is a good chance even a fully synced archive node has incomplete historical state data to some degree The output from geth should mention that it is `Starting statediff service` and block synchronization should begin shortly thereafter. Note that until it receives a subscriber, the statediffing process does nothing but wait for one. Once a subscription is received, this -will be indicated in the output and node will begin processing and sending statediffs. +will be indicated in the output and the node will begin processing and sending statediffs. -Also in the output will be the endpoints that we will use to interface with the node. +Also in the output will be the endpoints that will be used to interface with the node. The default ws url is "127.0.0.1:8546" and the default http url is "127.0.0.1:8545". These values will be used as the `ethereum.wsPath` and `ethereum.httpPath` in the config, respectively. #### Bitcoin For Bitcoin, ipfs-blockchain-watcher is able to operate entirely through the universally exposed JSON-RPC interfaces. -This means we can use any of the standard full nodes (e.g. bitcoind, btcd) as our data source. +This means any of the standard full nodes can be used (e.g. bitcoind, btcd) as the data source. Point at a remote node or set one up locally using the instructions for [bitcoind](https://github.com/bitcoin/bitcoin) and [btcd](https://github.com/btcsuite/btcd). The default http url is "127.0.0.1:8332". We will use the http endpoint as both the `bitcoin.wsPath` and `bitcoin.httpPath` -(bitcoind does not support websocket endpoints, we are currently using a "subscription" wrapper around the http endpoints) +(bitcoind does not support websocket endpoints, the watcher currently uses a "subscription" wrapper around the http endpoints) ### Watcher -Finally, we can setup the watcher process itself. +Finally, setup the watcher process itself. -Start by downloading vulcanizedb and moving into the repo: +Start by downloading ipfs-blockchain-watcher and moving into the repo: `go get github.com/vulcanize/ipfs-blockchain-watcher` @@ -237,7 +197,7 @@ For Ethereum: ``` ### Exposing the data -We can expose a number of different APIs for remote access to ipfs-blockchain-watcher data, these are dicussed in more detail [here](./documentation/apis.md) +A number of different APIs for remote access to ipfs-blockchain-watcher data can be exposed, these are discussed in more detail [here](./documentation/apis.md) ### Testing `make test` will run the unit tests diff --git a/cmd/watch.go b/cmd/watch.go index 942c58a5..b6e29e03 100644 --- a/cmd/watch.go +++ b/cmd/watch.go @@ -138,6 +138,7 @@ func init() { // flags for all config variables watchCmd.PersistentFlags().String("ipfs-path", "", "ipfs repository path") + watchCmd.PersistentFlags().String("ipfs-mode", "", "ipfs operation mode") watchCmd.PersistentFlags().String("watcher-chain", "", "which chain to support, options are currently Ethereum or Bitcoin.") watchCmd.PersistentFlags().Bool("watcher-server", false, "turn vdb server on or off") @@ -171,6 +172,7 @@ func init() { // and their bindings viper.BindPFlag("ipfs.path", watchCmd.PersistentFlags().Lookup("ipfs-path")) + viper.BindPFlag("ipfs.mode", watchCmd.PersistentFlags().Lookup("ipfs-mode")) viper.BindPFlag("watcher.chain", watchCmd.PersistentFlags().Lookup("watcher-chain")) viper.BindPFlag("watcher.server", watchCmd.PersistentFlags().Lookup("watcher-server")) diff --git a/dockerfiles/README.md b/dockerfiles/README.md deleted file mode 100644 index b2fbf2bb..00000000 --- a/dockerfiles/README.md +++ /dev/null @@ -1,32 +0,0 @@ -S -`Dockerfile` will build an alpine image containing: -- vDB as a binary with runtime deps statically linked: `/app/vulcanizedb` -- The migration tool goose: `/app/goose` -- Two services for running `headerSync` and `continuousLogSync`, started with the default configuration `environments/staging.toml`. - -By default, vDB is configured towards the Kovan deploy. The configuration values can be overridden using environment variables, using the same hierarchical naming pattern but in CAPS and using underscores. For example, the contract address for the `Pit` can be set with the variable `CONTRACT_ADDRESS_PIT="0x123..."`. - -## To use the container: -1. Setup a postgres database with superuser `vulcanize` -2. Set the env variables `DATABASE_NAME`, `DATABASE_HOSTNAME`, - `DATABASE_PORT`, `DATABASE_USER` & `DATABASE_PASSWORD` -3. Run the DB migrations: - * `./goose postgres "postgresql://$(DATABASE_USER):$(DATABASE_PASSWORD)@$(DATABASE_HOSTNAME):$(DATABASE_PORT)/$(DATABASE_NAME)?sslmode=disable" -e` -4. Set `CLIENT_IPCPATH` to a node endpoint -5. Set the contract variables: - * `CONTRACT_ADDRESS_[CONTRACT NAME]=0x123...` - * `CONTRACT_ABI_[CONTRACT NAME]="ABI STRING"` - * `CONTRACT_DEPLOYMENT-BLOCK_[CONTRACT NAME]=0` (doesn't really matter on a short chain, just avoids long unnecessary searching) -6. Start the `headerSync` and `continuousLogSync` services: - * `./vulcanizedb headerSync --config environments/staging.toml` - * `./vulcanizedb continuousLogSync --config environments/staging.toml` - -### Automated -The steps above have been rolled into a script: `/app/startup_script.sh`, which just assumes the DB env variables have been set, and defaults the rest to Kovan according to `environments/staging.toml`. This can be called with something like: - -`docker run -d -e DATABASE_NAME=vulcanize_public -e DATABASE_HOSTNAME=localhost -e DATABASE_PORT=5432 -e DATABASE_USER=vulcanize -e DATABASE_PASSWORD=vulcanize m0ar/images:vDB` - -### Logging -When running, vDB services log to `/app/vulcanizedb.log`. - diff --git a/dockerfiles/migrations/Dockerfile b/dockerfiles/migrations/Dockerfile index 220f5580..dbcea6d0 100644 --- a/dockerfiles/migrations/Dockerfile +++ b/dockerfiles/migrations/Dockerfile @@ -7,7 +7,7 @@ RUN apk add busybox-extras # this is probably a noob move, but I want apk from alpine for the above but need to avoid Go 1.13 below as this error still occurs https://github.com/ipfs/go-ipfs/issues/6603 FROM golang:1.12.4 as builder -# Get and build vulcanizedb +# Get and build ipfs-blockchain-watcher ADD . /go/src/github.com/vulcanize/ipfs-blockchain-watcher # Build migration tool diff --git a/dockerfiles/startup_script.sh b/dockerfiles/startup_script.sh deleted file mode 100755 index de74e711..00000000 --- a/dockerfiles/startup_script.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh -# Runs the migrations and starts the headerSync and continuousLogSync services - -# Exit if the variable tests fail -set -e - -# Check the database variables are set -test $DATABASE_NAME -test $DATABASE_HOSTNAME -test $DATABASE_PORT -test $DATABASE_USER -test $DATABASE_PASSWORD - -# Construct the connection string for postgres -CONNECT_STRING=postgresql://$DATABASE_USER:$DATABASE_PASSWORD@$DATABASE_HOSTNAME:$DATABASE_PORT/$DATABASE_NAME?sslmode=disable -echo "Connecting with: $CONNECT_STRING" - -set +e - -# Run the DB migrations -./goose postgres "$CONNECT_STRING" up -if [ $? -eq 0 ]; then - # Fire up the services - ./vulcanizedb headerSync --config environments/staging.toml & - ./vulcanizedb continuousLogSync --config environments/staging.toml & -else - echo "Could not run migrations. Are the database details correct?" -fi -wait diff --git a/dockerfiles/super_node/Dockerfile b/dockerfiles/super_node/Dockerfile index 69679db1..ef6a3cf7 100644 --- a/dockerfiles/super_node/Dockerfile +++ b/dockerfiles/super_node/Dockerfile @@ -4,10 +4,10 @@ RUN apk --update --no-cache add make git g++ linux-headers # DEBUG RUN apk add busybox-extras -# Get and build vulcanizedb +# Get and build ipfs-blockchain-watcher ADD . /go/src/github.com/vulcanize/ipfs-blockchain-watcher WORKDIR /go/src/github.com/vulcanize/ipfs-blockchain-watcher -RUN GO111MODULE=on GCO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o vulcanizedb . +RUN GO111MODULE=on GCO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o ipfs-blockchain-watcher . # Build migration tool RUN go get -u -d github.com/pressly/goose/cmd/goose @@ -37,7 +37,7 @@ COPY --chown=5000:5000 --from=builder /go/src/github.com/vulcanize/ipfs-blockcha # keep binaries immutable -COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/vulcanizedb vulcanizedb +COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/ipfs-blockchain-watcher ipfs-blockchain-watcher COPY --from=builder /go/src/github.com/pressly/goose/cmd/goose/goose goose COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/db/migrations migrations/vulcanizedb COPY --from=builder /go/src/github.com/vulcanize/ipfs-blockchain-watcher/environments environments diff --git a/dockerfiles/super_node/entrypoint.sh b/dockerfiles/super_node/entrypoint.sh index 1af5a9cb..7df52bc5 100755 --- a/dockerfiles/super_node/entrypoint.sh +++ b/dockerfiles/super_node/entrypoint.sh @@ -36,7 +36,7 @@ echo "Beginning the vulcanizedb process" VDB_CONFIG_FILE=${VDB_CONFIG_FILE:-config.toml} DEFAULT_OPTIONS="--config=$VDB_CONFIG_FILE" VDB_FULL_CL=${VDB_FULL_CL:-$VDB_COMMAND $DEFAULT_OPTIONS} -echo running: ./vulcanizedb $VDB_FULL_CL $@ +echo running: ./ipfs-blockchain-watcher $VDB_FULL_CL $@ case "$1" in "/bin/sh" ) @@ -50,8 +50,8 @@ if [[ -z "$vdb_args" ]]; then vdb_args="--config=config.toml" fi -echo running: ./vulcanizedb $vdb_args -./vulcanizedb $vdb_args +echo running: ./ipfs-blockchain-watcher $vdb_args +./ipfs-blockchain-watcher $vdb_args rv=$? if [ $rv != 0 ]; then diff --git a/dockerfiles/super_node/startup_script.sh b/dockerfiles/super_node/startup_script.sh index e30c5069..89039372 100755 --- a/dockerfiles/super_node/startup_script.sh +++ b/dockerfiles/super_node/startup_script.sh @@ -50,7 +50,7 @@ fi # If IPFS initialization was successful if [[ $? -eq 0 ]]; then echo "Running the VulcanizeDB process" - ./vulcanizedb ${VDB_COMMAND} --config=config.toml + ./ipfs-blockchain-watcher ${VDB_COMMAND} --config=config.toml else echo "Could not initialize IPFS." exit 1 diff --git a/documentation/apis.md b/documentation/apis.md index 7fdebe09..81184b89 100644 --- a/documentation/apis.md +++ b/documentation/apis.md @@ -101,18 +101,18 @@ The .toml file being used to fill the Ethereum subscription config would look so These configuration parameters are broken down as follows: -`ethSubscription.wsPath` is used to define the SuperNode ws url OR ipc endpoint we subscribe to +`ethSubscription.wsPath` is used to define the watcher ws url OR ipc endpoint to subscribe to `ethSubscription.historicalData` specifies whether or not ipfs-blockchain-watcher should look up historical data in its cache and -send that to the subscriber, if this is set to `false` then we only streams newly synced/incoming data +send that to the subscriber, if this is set to `false` then only newly synced/incoming data is streamed `ethSubscription.historicalDataOnly` will tell ipfs-blockchain-watcher to only send historical data with the specified range and not stream forward syncing data -`ethSubscription.startingBlock` is the starting block number for the range we want to receive data in +`ethSubscription.startingBlock` is the starting block number for the range to receive data in -`ethSubscription.endingBlock` is the ending block number for the range we want to receive data in; -setting to 0 means there is no end/we will continue streaming indefinitely. +`ethSubscription.endingBlock` is the ending block number for the range to receive data in; +setting to 0 means the process will continue streaming indefinitely. `ethSubscription.headerFilter` has two sub-options: `off` and `uncles`. @@ -122,34 +122,34 @@ setting to 0 means there is no end/we will continue streaming indefinitely. `ethSubscription.txFilter` has three sub-options: `off`, `src`, and `dst`. - Setting `off` to true tells ipfs-blockchain-watcher to not send any transactions to the subscriber -- `src` and `dst` are string arrays which can be filled with ETH addresses we want to filter transactions for, +- `src` and `dst` are string arrays which can be filled with ETH addresses to filter transactions for, if they have any addresses then ipfs-blockchain-watcher will only send transactions that were sent or received by the addresses contained in `src` and `dst`, respectively. `ethSubscription.receiptFilter` has four sub-options: `off`, `topics`, `contracts` and `matchTxs`. - Setting `off` to true tells ipfs-blockchain-watcher to not send any receipts to the subscriber -- `topic0s` is a string array which can be filled with event topics we want to filter for, +- `topic0s` is a string array which can be filled with event topics to filter for, if it has any topics then ipfs-blockchain-watcher will only send receipts that contain logs which have that topic0. -- `contracts` is a string array which can be filled with contract addresses we want to filter for, if it contains any contract addresses the watcher will +- `contracts` is a string array which can be filled with contract addresses to filter for, if it contains any contract addresses the watcher will only send receipts that correspond to one of those contracts. - `matchTrxs` is a bool which when set to true any receipts that correspond to filtered for transactions will be sent by the watcher, regardless of whether or not the receipt satisfies the `topics` or `contracts` filters. `ethSubscription.stateFilter` has three sub-options: `off`, `addresses`, and `intermediateNodes`. - Setting `off` to true tells ipfs-blockchain-watcher to not send any state data to the subscriber -- `addresses` is a string array which can be filled with ETH addresses we want to filter state for, +- `addresses` is a string array which can be filled with ETH addresses to filter state for, if it has any addresses then ipfs-blockchain-watcher will only send state leafs (accounts) corresponding to those account addresses. -- By default ipfs-blockchain-watcher only sends along state leafs, if we want to receive branch and extension nodes as well `intermediateNodes` can be set to `true`. +- By default ipfs-blockchain-watcher only sends along state leafs, to receive branch and extension nodes as well `intermediateNodes` can be set to `true`. `ethSubscription.storageFilter` has four sub-options: `off`, `addresses`, `storageKeys`, and `intermediateNodes`. - Setting `off` to true tells ipfs-blockchain-watcher to not send any storage data to the subscriber -- `addresses` is a string array which can be filled with ETH addresses we want to filter storage for, +- `addresses` is a string array which can be filled with ETH addresses to filter storage for, if it has any addresses then ipfs-blockchain-watcher will only send storage nodes from the storage tries at those state addresses. -- `storageKeys` is another string array that can be filled with storage keys we want to filter storage data for. It is important to note that the storage keys need to be the actual keccak256 hashes, whereas +- `storageKeys` is another string array that can be filled with storage keys to filter storage data for. It is important to note that the storage keys need to be the actual keccak256 hashes, whereas the addresses in the `addresses` fields are pre-hashed ETH addresses. -- By default ipfs-blockchain-watcher only sends along storage leafs, if we want to receive branch and extension nodes as well `intermediateNodes` can be set to `true`. +- By default ipfs-blockchain-watcher only sends along storage leafs, to receive branch and extension nodes as well `intermediateNodes` can be set to `true`. ### Bitcoin RPC Subscription: An example of how to subscribe to a real-time Bitcoin data feed from ipfs-blockchain-watcher using the `Stream` RPC method is provided below @@ -210,7 +210,7 @@ The .toml file being used to fill the Bitcoin subscription config would look som These configuration parameters are broken down as follows: -`btcSubscription.wsPath` is used to define the SuperNode ws url OR ipc endpoint we subscribe to +`btcSubscription.wsPath` is used to define the SuperNode ws url OR ipc endpoint to subscribe to `btcSubscription.historicalData` specifies whether or not ipfs-blockchain-watcher should look up historical data in its cache and send that to the subscriber, if this is set to `false` then ipfs-blockchain-watcher only streams newly synced/incoming data @@ -218,10 +218,10 @@ send that to the subscriber, if this is set to `false` then ipfs-blockchain-watc `btcSubscription.historicalDataOnly` will tell ipfs-blockchain-watcher to only send historical data with the specified range and not stream forward syncing data -`btcSubscription.startingBlock` is the starting block number for the range we want to receive data in +`btcSubscription.startingBlock` is the starting block number for the range to receive data in -`btcSubscription.endingBlock` is the ending block number for the range we want to receive data in; -setting to 0 means there is no end/we will continue streaming indefinitely. +`btcSubscription.endingBlock` is the ending block number for the range to receive data in; +setting to 0 means the process will continue streaming indefinitely. `btcSubscription.headerFilter` has one sub-option: `off`. diff --git a/documentation/architecture.md b/documentation/architecture.md index 86f33c6e..f312bb0d 100644 --- a/documentation/architecture.md +++ b/documentation/architecture.md @@ -10,15 +10,15 @@ ## Processes ipfs-blockchain-watcher is a [service](../pkg/watch/service.go#L61) comprised of the following interfaces: -* [Payload Fetcher](../pkg/shared/interfaces.go#L29): Fetches raw chain data from a half-duplex endpoint (HTTP/IPC), used for historical data fetching. ([BTC](../../pkg/btc/payload_fetcher.go), [ETH](../../pkg/eth/payload_fetcher.go)). -* [Payload Streamer](../pkg/shared/interfaces.go#L24): Streams raw chain data from a full-duplex endpoint (WebSocket/IPC), used for syncing data at the head of the chain in real-time. ([BTC](../../pkg/btc/http_streamer.go), [ETH](../../pkg/eth/streamer.go)). -* [Payload Converter](../pkg/shared/interfaces.go#L34): Converters raw chain data to an intermediary form prepared for IPFS publishing. ([BTC](../../pkg/btc/converter.go), [ETH](../../pkg/eth/converter.go)). -* [IPLD Publisher](../pkg/shared/interfaces.go#L39): Publishes the converted data to IPFS, returning their CIDs and associated metadata for indexing. ([BTC](../../pkg/btc/publisher.go), [ETH](../../pkg/eth/publisher.go)). -* [CID Indexer](../pkg/shared/interfaces.go#L44): Indexes CIDs in Postgres with their associated metadata. This metadata is chain specific and selected based on utility. ([BTC](../../pkg/btc/indexer.go), [ETH](../../pkg/eth/indexer.go)). -* [CID Retriever](../pkg/shared/interfaces.go#L54): Retrieves CIDs from Postgres by searching against their associated metadata, is used to lookup data to serve API requests/subscriptions. ([BTC](../../pkg/btc/retriever.go), [ETH](../../pkg/eth/retriever.go)). -* [IPLD Fetcher](../pkg/shared/interfaces.go#L62): Fetches the IPLDs needed to service API requests/subscriptions from IPFS using retrieved CIDS; can route through a IPFS block-exchange to search for objects that are not directly available. ([BTC](../../pkg/btc/ipld_fetcher.go), [ETH](../../pkg/eth/ipld_fetcher.go)) -* [Response Filterer](../pkg/shared/interfaces.go#L49): Filters converted data payloads served to API subscriptions; filters according to the subscriber provided parameters. ([BTC](../../pkg/btc/filterer.go), [ETH](../../pkg/eth/filterer.go)). -* [API](https://github.com/ethereum/go-ethereum/blob/master/rpc/types.go#L31): Expose RPC methods for clients to interface with the data. Chain-specific APIs should aim to recapitulate as much of the native API as possible. ([VDB](../../pkg/api.go), [ETH](../../pkg/eth/api.go)). +* [Payload Fetcher](../pkg/shared/interfaces.go#L29): Fetches raw chain data from a half-duplex endpoint (HTTP/IPC), used for historical data fetching. ([BTC](../pkg/btc/payload_fetcher.go), [ETH](../pkg/eth/payload_fetcher.go)). +* [Payload Streamer](../pkg/shared/interfaces.go#L24): Streams raw chain data from a full-duplex endpoint (WebSocket/IPC), used for syncing data at the head of the chain in real-time. ([BTC](../pkg/btc/http_streamer.go), [ETH](../pkg/eth/streamer.go)). +* [Payload Converter](../pkg/shared/interfaces.go#L34): Converters raw chain data to an intermediary form prepared for IPFS publishing. ([BTC](../pkg/btc/converter.go), [ETH](../pkg/eth/converter.go)). +* [IPLD Publisher](../pkg/shared/interfaces.go#L39): Publishes the converted data to IPFS, returning their CIDs and associated metadata for indexing. ([BTC](../pkg/btc/publisher.go), [ETH](../pkg/eth/publisher.go)). +* [CID Indexer](../pkg/shared/interfaces.go#L44): Indexes CIDs in Postgres with their associated metadata. This metadata is chain specific and selected based on utility. ([BTC](../pkg/btc/indexer.go), [ETH](../pkg/eth/indexer.go)). +* [CID Retriever](../pkg/shared/interfaces.go#L54): Retrieves CIDs from Postgres by searching against their associated metadata, is used to lookup data to serve API requests/subscriptions. ([BTC](../pkg/btc/retriever.go), [ETH](../pkg/eth/retriever.go)). +* [IPLD Fetcher](../pkg/shared/interfaces.go#L62): Fetches the IPLDs needed to service API requests/subscriptions from IPFS using retrieved CIDS; can route through a IPFS block-exchange to search for objects that are not directly available. ([BTC](../pkg/btc/ipld_fetcher.go), [ETH](../pkg/eth/ipld_fetcher.go)) +* [Response Filterer](../pkg/shared/interfaces.go#L49): Filters converted data payloads served to API subscriptions; filters according to the subscriber provided parameters. ([BTC](../pkg/btc/filterer.go), [ETH](../pkg/eth/filterer.go)). +* [API](https://github.com/ethereum/go-ethereum/blob/master/rpc/types.go#L31): Expose RPC methods for clients to interface with the data. Chain-specific APIs should aim to recapitulate as much of the native API as possible. ([VDB](../pkg/api.go), [ETH](../pkg/eth/api.go)). Appropriating the service for a new chain is done by creating underlying types to satisfy these interfaces for @@ -106,7 +106,7 @@ Currently, ipfs-blockchain-watcher persists all data to a single Postgres databa Chain-specific data is populated under a chain-specific schema (e.g. `eth` and `btc`) while shared data- such as the IPFS blocks table- is populated under the `public` schema. Subsequent watchers which act on the raw chain data should build and populate their own schemas or separate databases entirely. -In the future, we will be moving to a foreign table based architecture wherein a single db is used for shared data while each watcher uses +In the future, the database architecture will be moving to a foreign table based architecture wherein a single db is used for shared data while each watcher uses its own database and accesses and acts on the shared data through foreign tables. Isolating watchers to their own databases will prevent complications and conflicts between watcher db migrations. @@ -119,7 +119,7 @@ More detailed information on the APIs can be found [here](apis.md). ## Resync A separate command `resync` is available for directing the resyncing of data within specified ranges. -This is useful if we want to re-validate a range of data using a new source or clean out bad/deprecated data. +This is useful if there is a need to re-validate a range of data using a new source or clean out bad/deprecated data. More detailed information on this command can be found [here](resync.md). ## IPFS Considerations @@ -129,8 +129,8 @@ directly with the backing Postgres database. Both these options circumvent the need to run a full IPFS daemon with a [go-ipld-eth](https://github.com/ipfs/go-ipld-eth) or [go-ipld-btc](https://github.com/ipld/go-ipld-btc) plugin. The former approach can lead to issues with lock-contention on the IPFS repo if another IPFS process is configured and running at the same $IPFS_PATH, it also necessitates the need for a locally configured IPFS repository. The later bypasses the need for a configured IPFS repository/$IPFS_PATH and allows all Postgres write operations at a given block height -to occur in a single transaction, the only disadvantage is that by avoiding moving through an IPFS node intermediary we lose the direct ability to reach out to the block -exchange for data we do not have locally. +to occur in a single transaction, the only disadvantage is that by avoiding moving through an IPFS node intermediary the direct ability to reach out to the block +exchange for data not found locally is lost. Once go-ipld-eth and go-ipld-btc have been updated to work with a modern version of PG-IPFS, an additional option will be provided to direct all publishing and fetching of IPLD objects through a remote IPFS daemon. \ No newline at end of file diff --git a/documentation/ipfs.md b/documentation/ipfs.md new file mode 100644 index 00000000..3997c1d1 --- /dev/null +++ b/documentation/ipfs.md @@ -0,0 +1,53 @@ +### PG-IPFS configuration + +This doc walks through the steps to install IPFS and configure it to use Postgres as its backing datastore. + +1. Start by downloading and moving into the IPFS repo: + +`go get github.com/ipfs/go-ipfs` + +`cd $GOPATH/src/github.com/ipfs/go-ipfs` + +2. Add the [Postgres-supporting fork](https://github.com/vulcanize/go-ipfs) and switch over to it: + +`git remote add vulcanize https://github.com/vulcanize/go-ipfs.git` + +`git fetch vulcanize` + +`git checkout -b postgres_update tags/v0.4.22-alpha` + +3. Now install this fork of ipfs, first be sure to remove any previous installation: + +`make install` + +4. Check that is installed properly by running: + +`ipfs` + +You should see the CLI info/help output. + +5. Now we initialize with the `postgresds` profile. +If ipfs was previously initialized we will need to remove the old profile first. +We also need to provide env variables for the postgres connection: + +We can either set these manually, e.g. +```bash +export IPFS_PGHOST= +export IPFS_PGUSER= +export IPFS_PGDATABASE= +export IPFS_PGPORT= +export IPFS_PGPASSWORD= +``` + +And then run the ipfs command: + +`ipfs init --profile=postgresds` + +Or we can use the pre-made script at `GOPATH/src/github.com/ipfs/go-ipfs/misc/utility/ipfs_postgres.sh` +which has usage: + +`./ipfs_postgres.sh "` + +and will ask us to enter the password, avoiding storing it to an ENV variable. + +Once we have initialized ipfs, that is all we need to do with it- we do not need to run a daemon during the subsequent processes. \ No newline at end of file diff --git a/documentation/resync.md b/documentation/resync.md index 8d4cb1d2..44e5332d 100644 --- a/documentation/resync.md +++ b/documentation/resync.md @@ -4,9 +4,9 @@ It also contains a utility for cleaning out old data, and resetting the validati ### Rational -Manual resyncing of data is useful when we want to re-validate data within specific ranges using a new source. +Manual resyncing of data can be used to re-validate data within specific ranges using a new source. -Cleaning out data is useful when we need to remove bad/deprecated data or prepare for breaking changes to the db schemas. +Option to remove data may be needed for bad/deprecated data or to prepare for breaking changes to the db schemas. Resetting the validation level of data is useful for designating ranges of data for resyncing by an ongoing ipfs-blockchain-watcher backfill process. diff --git a/pkg/eth/mocks/test_data.go b/pkg/eth/mocks/test_data.go index fb5a32fe..e4cf42bb 100644 --- a/pkg/eth/mocks/test_data.go +++ b/pkg/eth/mocks/test_data.go @@ -47,7 +47,7 @@ var ( BlockNumber = big.NewInt(1) MockHeader = types.Header{ Time: 0, - Number: BlockNumber, + Number: new(big.Int).Set(BlockNumber), Root: common.HexToHash("0x0"), TxHash: common.HexToHash("0x0"), ReceiptHash: common.HexToHash("0x0"), @@ -298,7 +298,7 @@ var ( } MockStateDiff = statediff.StateObject{ - BlockNumber: BlockNumber, + BlockNumber: new(big.Int).Set(BlockNumber), BlockHash: MockBlock.Hash(), Nodes: StateDiffs, } @@ -414,7 +414,7 @@ var ( } MockCIDWrapper = ð.CIDWrapper{ - BlockNumber: big.NewInt(1), + BlockNumber: new(big.Int).Set(BlockNumber), Header: eth.HeaderModel{ BlockNumber: "1", BlockHash: MockBlock.Hash().String(), @@ -459,7 +459,7 @@ var ( StorageIPLD, _ = blocks.NewBlockWithCid(StorageLeafNode, StorageCID) MockIPLDs = eth.IPLDs{ - BlockNumber: big.NewInt(1), + BlockNumber: new(big.Int).Set(BlockNumber), Header: ipfs.BlockModel{ Data: HeaderIPLD.RawData(), CID: HeaderIPLD.Cid().String(), @@ -533,7 +533,7 @@ func createTransactionsAndReceipts() (types.Transactions, types.Receipts, common trx1 := types.NewTransaction(0, Address, big.NewInt(1000), 50, big.NewInt(100), []byte{}) trx2 := types.NewTransaction(1, AnotherAddress, big.NewInt(2000), 100, big.NewInt(200), []byte{}) trx3 := types.NewContractCreation(2, big.NewInt(1500), 75, big.NewInt(150), []byte{0, 1, 2, 3, 4, 5}) - transactionSigner := types.MakeSigner(params.MainnetChainConfig, BlockNumber) + transactionSigner := types.MakeSigner(params.MainnetChainConfig, new(big.Int).Set(BlockNumber)) mockCurve := elliptic.P256() mockPrvKey, err := ecdsa.GenerateKey(mockCurve, rand.Reader) if err != nil { diff --git a/pkg/shared/ipfs_mode.go b/pkg/shared/ipfs_mode.go index e6eb7f25..14d7d856 100644 --- a/pkg/shared/ipfs_mode.go +++ b/pkg/shared/ipfs_mode.go @@ -46,7 +46,7 @@ func (c IPFSMode) String() string { func NewIPFSMode(name string) (IPFSMode, error) { switch strings.ToLower(name) { - case "local", "interface", "minimal": + case "local", "interface": return LocalInterface, nil case "remote", "client": return RemoteClient, errors.New("remote IPFS client mode is not currently supported") diff --git a/scripts/reset_db b/scripts/reset_db index c7870271..f6fefe3b 100755 --- a/scripts/reset_db +++ b/scripts/reset_db @@ -11,9 +11,9 @@ fi db=$1 dir=$(basename "$(pwd)") -if [ $dir != "vulcanizedb" ] +if [ $dir != "ipfs-blockchain-watcher" ] then - echo "Run me from the vulcanizedb root dir" + echo "Run me from the ipfs-blockchain-watcher root dir" exit 1 fi diff --git a/test_config/test_config.go b/test_config/test_config.go index bb01502a..96143b04 100644 --- a/test_config/test_config.go +++ b/test_config/test_config.go @@ -34,7 +34,7 @@ func init() { func setTestConfig() { vip := viper.New() vip.SetConfigName("testing") - vip.AddConfigPath("$GOPATH/src/github.com/vulcanize/vulcanizedb/environments/") + vip.AddConfigPath("$GOPATH/src/github.com/vulcanize/ipfs-blockchain-watcher/environments/") if err := vip.ReadInConfig(); err != nil { logrus.Fatal(err) }