updates to documentation for super node

2020-04-17 16:59:25 -05:00 · 2020-04-17 16:59:25 -05:00 · 3764662fe3
commit 3764662fe3
parent 2d98d741b2
16 changed files with 601 additions and 295 deletions
--- a/cmd/streamEthSubscribe.go
+++ b/cmd/streamEthSubscribe.go
@ -168,7 +168,7 @@ func streamEthSubscription() {
 }
 func getRPCClient() core.RPCClient {
-	vulcPath := viper.GetString("superNode.ethSubscription.path")
+	vulcPath := viper.GetString("superNode.ethSubscription.wsPath")
 	if vulcPath == "" {
 		vulcPath = "ws://127.0.0.1:8080" // default to and try the default ws url if no path is provided
 	}
--- a/documentation/super_node/apis.md
+++ b/documentation/super_node/apis.md
@ -0,0 +1,262 @@
 ## VulcanizeDB Super Node APIs
 The super node exposes a number of different APIs for remote access to the underlying DB.
 ### Table of Contents
 1. [Postgraphile](#postgraphile)
 1. [RPC Subscription Interface](#rpc-subscription-interface)
 1. [Native API Recapitulation](#native-api-recapitulation)
 ### Postgraphile
 The super node process all data into a Postgres DB- using PG-IPFS this includes all of the IPLD objects.
 [Postgraphile](https://www.graphile.org/postgraphile/) can be used to expose GraphQL endpoints for the Postgres tables.
 e.g. 
 `postgraphile --plugins @graphile/pg-pubsub --subscriptions --simple-subscriptions -c postgres://localhost:5432/vulcanize_public?sslmode=disable -s public,btc,eth -a -j`
 This will stand up a Postgraphile server on the public, eth, and btc schemas- exposing GraphQL endpoints for all of the tables contained under those schemas.
 All of their data can then be queried with standard [GraphQL](https://graphql.org) queries.
 ### RPC Subscription Interface
 A direct, real-time subscription to the data being processed by the super node can be established over WS or IPC through the [Stream](../../pkg/super_node/api.go#L53) RPC method.
 This method is not chain-specific and each chain-type supports it, it is accessed under the "vdb" namespace rather than a chain-specific namespace. An interface for
 subscribing to this endpoint is provided [here](../../libraries/shared/streamer/super_node_streamer.go).
 When subscribing to this endpoint, the subscriber provides a set of RLP-encoded subscription parameters. These parameters will be chain-specific, and are used
 by the super node to filter and return a requested subset of chain data to the subscriber. (e.g. [BTC](../../pkg/super_node/btc/subscription_config.go), [ETH](../../pkg/super_node/eth/subscription_config.go)).
 #### Ethereum RPC Subscription
 An example of how to subscribe to a real-time Ethereum data feed from the super node using the `Stream` RPC method is provided below
 ```go
    package main 
    import (
    	"github.com/ethereum/go-ethereum/rlp"
    	"github.com/ethereum/go-ethereum/rpc"
    	"github.com/spf13/viper"
    	"github.com/vulcanize/vulcanizedb/libraries/shared/streamer"
    	"github.com/vulcanize/vulcanizedb/pkg/eth/client"
    	"github.com/vulcanize/vulcanizedb/pkg/super_node"
    	"github.com/vulcanize/vulcanizedb/pkg/super_node/eth"
    )
    config, _ := eth.NewEthSubscriptionConfig()
    rlpConfig, _ := rlp.EncodeToBytes(config)
    vulcPath := viper.GetString("superNode.ethSubscription.path")
    rawRPCClient, _ := rpc.Dial(vulcPath)
    rpcClient := client.NewRPCClient(rawRPCClient, vulcPath)
    stream := streamer.NewSuperNodeStreamer(rpcClient)
    payloadChan := make(chan super_node.SubscriptionPayload, 20000)
    subscription, _ := stream.Stream(payloadChan, rlpConfig)
    for {
        select {
        case payload := <- payloadChan:
            // do something with the subscription payload
        case err := <- subscription.Err():
            // do something with the subscription error
        }
    }
 ```
 The .toml file being used to fill the Ethereum subscription config would look something like this:
 ```toml
 [superNode]
    [superNode.ethSubscription]
        historicalData = false
        historicalDataOnly = false
        startingBlock = 0
        endingBlock = 0
        wsPath = "ws://127.0.0.1:8080"
        [superNode.ethSubscription.headerFilter]
            off = false
            uncles = false
        [superNode.ethSubscription.txFilter]
            off = false
            src = []
            dst = []
        [superNode.ethSubscription.receiptFilter]
            off = false
            contracts = []
            topic0s = []
            topic1s = []
            topic2s = []
            topic3s = []
        [superNode.ethSubscription.stateFilter]
            off = false
            addresses = []
           intermediateNodes = false
        [superNode.ethSubscription.storageFilter]
            off = true
            addresses = []
            storageKeys = []
            intermediateNodes = false
 ```
 These configuration parameters are broken down as follows:
 `ethSubscription.wsPath` is used to define the SuperNode ws url OR ipc endpoint we subscribe to
 `ethSubscription.historicalData` specifies whether or not the super node should look up historical data in its cache and
 send that to the subscriber, if this is set to `false` then the super node only streams newly synced/incoming data
 `ethSubscription.historicalDataOnly` will tell the super node to only send historical data with the specified range and
 not stream forward syncing data
 `ethSubscription.startingBlock` is the starting block number for the range we want to receive data in
 `ethSubscription.endingBlock` is the ending block number for the range we want to receive data in;
 setting to 0 means there is no end/we will continue streaming indefinitely.
 `ethSubscription.headerFilter` has two sub-options: `off` and `uncles`. 
 - Setting `off` to true tells the super node to not send any headers to the subscriber
 - setting `uncles` to true tells the super node to send uncles in addition to normal headers.
 `ethSubscription.txFilter` has three sub-options: `off`, `src`, and `dst`. 
 - Setting `off` to true tells the super node to not send any transactions to the subscriber
 - `src` and `dst` are string arrays which can be filled with ETH addresses we want to filter transactions for,
 if they have any addresses then the super node will only send transactions that were sent or received by the addresses contained
 in `src` and `dst`, respectively.
 `ethSubscription.receiptFilter` has four sub-options: `off`, `topics`, `contracts` and `matchTxs`. 
 - Setting `off` to true tells the super node to not send any receipts to the subscriber
 - `topic0s` is a string array which can be filled with event topics we want to filter for,
 if it has any topics then the super node will only send receipts that contain logs which have that topic0.
 - `contracts` is a string array which can be filled with contract addresses we want to filter for, if it contains any contract addresses the super node will
 only send receipts that correspond to one of those contracts. 
 - `matchTrxs` is a bool which when set to true any receipts that correspond to filtered for transactions will be sent by the super node, regardless of whether or not the receipt satisfies the `topics` or `contracts` filters.
 `ethSubscription.stateFilter` has three sub-options: `off`, `addresses`, and `intermediateNodes`. 
 - Setting `off` to true tells the super node to not send any state data to the subscriber
 - `addresses` is a string array which can be filled with ETH addresses we want to filter state for,
 if it has any addresses then the super node will only send state leafs (accounts) corresponding to those account addresses. 
 - By default the super node only sends along state leafs, if we want to receive branch and extension nodes as well `intermediateNodes` can be set to `true`.
 `ethSubscription.storageFilter` has four sub-options: `off`, `addresses`, `storageKeys`, and `intermediateNodes`. 
 - Setting `off` to true tells the super node to not send any storage data to the subscriber
 - `addresses` is a string array which can be filled with ETH addresses we want to filter storage for,
 if it has any addresses then the super node will only send storage nodes from the storage tries at those state addresses. 
 - `storageKeys` is another string array that can be filled with storage keys we want to filter storage data for. It is important to note that the storage keys need to be the actual keccak256 hashes, whereas
 the addresses in the `addresses` fields are pre-hashed ETH addresses.
 - By default the super node only sends along storage leafs, if we want to receive branch and extension nodes as well `intermediateNodes` can be set to `true`.
 ### Bitcoin RPC Subscription:
 An example of how to subscribe to a real-time Bitcoin data feed from the super node using the `Stream` RPC method is provided below
 ```go
    package main 
    import (
    	"github.com/ethereum/go-ethereum/rlp"
    	"github.com/ethereum/go-ethereum/rpc"
    	"github.com/spf13/viper"
    	"github.com/vulcanize/vulcanizedb/libraries/shared/streamer"
    	"github.com/vulcanize/vulcanizedb/pkg/eth/client"
    	"github.com/vulcanize/vulcanizedb/pkg/super_node"
    	"github.com/vulcanize/vulcanizedb/pkg/super_node/btc"
    )
    config, _ := btc.NewBtcSubscriptionConfig()
    rlpConfig, _ := rlp.EncodeToBytes(config)
    vulcPath := viper.GetString("superNode.btcSubscription.path")
    rawRPCClient, _ := rpc.Dial(vulcPath)
    rpcClient := client.NewRPCClient(rawRPCClient, vulcPath)
    stream := streamer.NewSuperNodeStreamer(rpcClient)
    payloadChan := make(chan super_node.SubscriptionPayload, 20000)
    subscription, _ := stream.Stream(payloadChan, rlpConfig)
    for {
        select {
        case payload := <- payloadChan:
            // do something with the subscription payload
        case err := <- subscription.Err():
            // do something with the subscription error
        }
    }
 ```
 The .toml file being used to fill the Bitcoin subscription config would look something like this:
 ```toml
 [superNode]
    [superNode.btcSubscription]
        historicalData = false
        historicalDataOnly = false
        startingBlock = 0
        endingBlock = 0
        wsPath = "ws://127.0.0.1:8080"
        [superNode.btcSubscription.headerFilter]
            off = false
        [superNode.btcSubscription.txFilter]
            off = false
            segwit = false
            witnessHashes = []
            indexes = []
            pkScriptClass = []
            multiSig = false
            addresses = []
 ```
 These configuration parameters are broken down as follows:
 `btcSubscription.wsPath` is used to define the SuperNode ws url OR ipc endpoint we subscribe to
 `btcSubscription.historicalData` specifies whether or not the super node should look up historical data in its cache and
 send that to the subscriber, if this is set to `false` then the super node only streams newly synced/incoming data
 `btcSubscription.historicalDataOnly` will tell the super node to only send historical data with the specified range and
 not stream forward syncing data
 `btcSubscription.startingBlock` is the starting block number for the range we want to receive data in
 `btcSubscription.endingBlock` is the ending block number for the range we want to receive data in;
 setting to 0 means there is no end/we will continue streaming indefinitely.
 `btcSubscription.headerFilter` has one sub-option: `off`. 
 - Setting `off` to true tells the super node to
 not send any headers to the subscriber. 
 - Additional header-filtering options will be added in the future.
 `btcSubscription.txFilter` has seven sub-options: `off`, `segwit`, `witnessHashes`, `indexes`, `pkScriptClass`, `multiSig`, and `addresses`.
 - Setting `off` to true tells the super node to not send any transactions to the subscriber. 
 - Setting `segwit` to true tells the super node to only send segwit transactions. 
 - `witnessHashes` is a string array that can be filled with witness hash string; if it contains any hashes the super node will only send transactions that contain one of those hashes.
 - `indexes` is an int64 array that can be filled with tx index numbers; if it contains any integers the super node will only send transactions at those indexes (e.g. `[0]` will send only coinbase transactions)
 - `pkScriptClass` is an uint8 array that can be filled with pk script class numbers; if it contains any integers the super node will only send transactions that have at least one tx output with one of the specified pkscript classes;
 possible class types are 0 through 8 as defined [here](https://github.com/btcsuite/btcd/blob/master/txscript/standard.go#L52).
 - Setting `multisig` to true tells the super node to send only multi-sig transactions- to send only transaction that have at least one tx output that requires more than one signature to spend.
 - `addresses` is a string array that can be filled with btc address strings; if it contains any addresses the super node will only send transactions that have at least one tx output with at least one of the provided addresses.
 ### Native API Recapitulation:
 In addition to providing novel Postgraphile and RPC-Subscription endpoints, we are working towards complete recapitulation of the
 standard chain APIs. This will allow direct compatibility with software that already makes use of the standard interfaces.
 #### Ethereum JSON-RPC API
 The super node currently faithfully recapitulates portions of the Ethereum JSON-RPC api standard.  
 The currently supported endpoints include:  
 `eth_blockNumber`  
 `eth_getLogs`  
 `eth_getHeaderByNumber`  
 `eth_getBlockByNumber`  
 `eth_getBlockByHash`  
 `eth_getTransactionByHash`  
 Additional endpoints will be added in the near future, with the immediate goal of recapitulating the entire "eth_" set of endpoints.
 #### Bitcoin JSON-RPC API:
 In the near future, the standard Bitcoin JSON-RPC interfaces will be implemented.
--- a/documentation/super_node/architecture.md
+++ b/documentation/super_node/architecture.md
@ -1,16 +1,134 @@
-These are the components of a VulcanizeDB Watcher:
+# VulcanizeDB Super Node Architecture
-* Data Fetcher/Streamer sources:
+The VulcanizeDB super node is a collection of interfaces that are used to extract, process, and store in Postgres-IPFS
-  * go-ethereum
+all chain data. The raw data indexed by the super node serves as the basis for more specific watchers and applications.
-  * bitcoind
+
-  * btcd
+Currently the service supports complete processing of all Bitcoin and Ethereum data.
-  * IPFS
+
-* Transformers contain:
+## Table of Contents
-  * converter
+1. [Processes](#processes)
-  * publisher
+1. [Command](#command)
-  * indexer
+1. [Configuration](#config)
-* Endpoints contain:
+1. [Database](#database)
-  * api
+1. [APIs](#apis)
-  * backend
+1. [Resync](#resync)
-  * filterer
+1. [IPFS Considerations](#ipfs-considerations)
-  * retriever
+
-    * ipld_server
+## Processes
 The [super node service](../../pkg/super_node/service.go#L61) is comprised of the following interfaces:
 * [Payload Fetcher](../../pkg/super_node/shared/interfaces.go#L29): Fetches raw chain data from a half-duplex endpoint (HTTP/IPC), used for historical data fetching. ([BTC](../../pkg/super_node/btc/payload_fetcher.go), [ETH](../../pkg/super_node/eth/payload_fetcher.go)).
 * [Payload Streamer](../../pkg/super_node/shared/interfaces.go#L24): Streams raw chain data from a full-duplex endpoint (WebSocket/IPC), used for syncing data at the head of the chain in real-time. ([BTC](../../pkg/super_node/btc/http_streamer.go), [ETH](../../pkg/super_node/eth/streamer.go)).
 * [Payload Converter](../../pkg/super_node/shared/interfaces.go#L34): Converters raw chain data to an intermediary form prepared for IPFS publishing. ([BTC](../../pkg/super_node/btc/converter.go), [ETH](../../pkg/super_node/eth/converter.go)).
 * [IPLD Publisher](../../pkg/super_node/shared/interfaces.go#L39): Publishes the converted data to IPFS, returning their CIDs and associated metadata for indexing. ([BTC](../../pkg/super_node/btc/publisher.go), [ETH](../../pkg/super_node/eth/publisher.go)).
 * [CID Indexer](../../pkg/super_node/shared/interfaces.go#L44): Indexes CIDs in Postgres with their associated metadata. This metadata is chain specific and selected based on utility. ([BTC](../../pkg/super_node/btc/indexer.go), [ETH](../../pkg/super_node/eth/indexer.go)).
 * [CID Retriever](../../pkg/super_node/shared/interfaces.go#L54): Retrieves CIDs from Postgres by searching against their associated metadata, is used to lookup data to serve API requests/subscriptions. ([BTC](../../pkg/super_node/btc/retriever.go), [ETH](../../pkg/super_node/eth/retriever.go)).
 * [IPLD Fetcher](../../pkg/super_node/shared/interfaces.go#L62): Fetches the IPLDs needed to service API requests/subscriptions from IPFS using retrieved CIDS; can route through a IPFS block-exchange to search for objects that are not directly available. ([BTC](../../pkg/super_node/btc/ipld_fetcher.go), [ETH](../../pkg/super_node/eth/ipld_fetcher.go))
 * [Response Filterer](../../pkg/super_node/shared/interfaces.go#L49): Filters converted data payloads served to API subscriptions; filters according to the subscriber provided parameters. ([BTC](../../pkg/super_node/btc/filterer.go), [ETH](../../pkg/super_node/eth/filterer.go)).
 * [DB Cleaner](../../pkg/super_node/shared/interfaces.go#L73): Used to clean out cached IPFS objects, CIDs, and associated metadata. Useful for removing bad data or to introduce incompatible changes to the db schema/tables. ([BTC](../../pkg/super_node/btc/cleaner.go), [ETH](../../pkg/super_node/eth/cleaner.go)).
 * [API](https://github.com/ethereum/go-ethereum/blob/master/rpc/types.go#L31): Expose RPC methods for clients to interface with the data. Chain-specific APIs should aim to recapitulate as much of the native API as possible. ([VDB](../../pkg/super_node/api.go), [ETH](../../pkg/super_node/eth/api.go)).
 Appropriating the service for a new chain is done by creating underlying types to satisfy these interfaces for
 the specifics of that chain.
 The service uses these interfaces to operate in any combination of three modes: sync, serve, and backfill.
 * Sync: Streams raw chain data at the head, converts and publishes it to IPFS, and indexes the resulting set of CIDs in Postgres with useful metadata.
 * BackFill: Automatically searches for and detects gaps in the DB; fetches, converts, publishes, and indexes the data to fill these gaps.
 * Serve: Opens up IPC, HTTP, and WebSocket servers on top of the superNode DB and any concurrent sync and/or backfill processes.
 These three modes are all operated through a single vulcanizeDB command: `superNode`
 ## Command
 Usage: `./vulcanizedb superNode --config={config.toml}`
 Configuration can also be done through CLI options and/or environmental variables.
 CLI options can be found using `./vulcanizedb superNode --help`.
 ## Config
 Below is the set of universal config parameters for the superNode command, in .toml form, with the respective environmental variables commented to the side.
 This set of parameters needs to be set no matter the chain type.
 ```toml
 [database]
    name     = "vulcanize_public" # $DATABASE_NAME
    hostname = "localhost" # $DATABASE_HOSTNAME
    port     = 5432 # $DATABASE_PORT
    user     = "vdbm" # $DATABASE_USER
    password = "" # $DATABASE_PASSWORD
 [ipfs]
    path = "~/.ipfs" # $IPFS_PATH
 [superNode]
    chain = "bitcoin" # $SUPERNODE_CHAIN
    server = true # $SUPERNODE_SERVER
    ipcPath = "~/.vulcanize/vulcanize.ipc" # $SUPERNODE_IPC_PATH
    wsPath = "127.0.0.1:8082" # $SUPERNODE_WS_PATH
    httpPath = "127.0.0.1:8083" # $SUPERNODE_HTTP_PATH
    sync = true # $SUPERNODE_SYNC
    workers = 1 # $SUPERNODE_WORKERS
    backFill = true # $SUPERNODE_BACKFILL
    frequency = 45 # $SUPERNODE_FREQUENCY
    batchSize = 1 # $SUPERNODE_BATCH_SIZE
    batchNumber = 50 # $SUPERNODE_BATCH_NUMBER
    validationLevel = 1 # $SUPERNODE_VALIDATION_LEVEL
 ```
 Additional parameters need to be set depending on the specific chain.
 For Bitcoin:
 ```toml
 [bitcoin]
    wsPath  = "127.0.0.1:8332" # $BTC_WS_PATH
    httpPath = "127.0.0.1:8332" # $BTC_HTTP_PATH
    pass = "password" # $BTC_NODE_PASSWORD
    user = "username" # $BTC_NODE_USER
    nodeID = "ocd0" # $BTC_NODE_ID
    clientName = "Omnicore" # $BTC_CLIENT_NAME
    genesisBlock = "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" # $BTC_GENESIS_BLOCK
    networkID = "0xD9B4BEF9" # $BTC_NETWORK_ID
 ```
 For Ethereum:
 ```toml
 [ethereum]
    wsPath  = "127.0.0.1:8546" # $ETH_WS_PATH
    httpPath = "127.0.0.1:8545" # $ETH_HTTP_PATH
 ```
 ## Database
 Currently, the super node persists all data to a single Postgres database. The migrations for this DB can be found [here](../../db/migrations).
 Chain-specific data is populated under a chain-specific schema (e.g. `eth` and `btc`) while shared data- such as the IPFS blocks table- is populated under the `public` schema.
 Subsequent watchers which act on the raw chain data should build and populate their own schemas or separate databases entirely.
 In the future, we will be moving to a foreign table based architecture wherein a single db is used for shared data while each watcher uses
 its own database and accesses and acts on the shared data through foreign tables. Isolating watchers to their own databases will prevent complications and
 conflicts between watcher db migrations.
 ## APIs
 The super node provides mutliple types of APIs by which to interface with its data.
 More detailed information on the APIs can be found [here](apis.md).
 ## Resync
 A separate command `resync` is available for directing the resyncing of data within specified ranges.
 This is useful if we want to re-validate a range of data using a new source or clean out bad/deprecated data.
 More detailed information on this command can be found [here](resync.md).
 ## IPFS Considerations
 Currently, the IPLD Publisher and Fetcher use internalized IPFS processes which interface directly with a local IPFS repository.
 This circumvents the need to run a full IPFS daemon with a [go-ipld-eth](https://github.com/ipfs/go-ipld-eth) plugin, but can lead to issues
 with lock-contention on the IPFS repo if another IPFS process is configured and running at the same $IPFS_PATH. This also necessitates the need for
 a locally configured IPFS repository.
 Once go-ipld-eth has been updated to work with a modern version of PG-IPFS, an additional option will be provided to direct
 all publishing and fetching of IPLD objects through a remote IPFS daemon.
--- a/documentation/super_node/resync.md
+++ b/documentation/super_node/resync.md
@ -0,0 +1,68 @@
 ## VulcanizeDB Super Node Resync
 The `resync` command is made available for directing the resyncing of super node data within specified ranges.
 It also contains a utility for cleaning out old data, and resetting the validation level of data.
 ### Rational
 Manual resyncing of data is useful when we want to re-validate data within specific ranges using a new source.
 Cleaning out data is useful when we need to remove bad/deprecated data or prepare for breaking changes to the db schemas.
 Resetting the validation level of data is useful for designating ranges of data for resyncing by an ongoing super node
 backfill process.
 ### Command
 Usage: `./vulcanizedb resync --config={config.toml}`
 Configuration can also be done through CLI options and/or environmental variables.
 CLI options can be found using `./vulcanizedb resync --help`.
 ### Config
 Below is the set of universal config parameters for the resync command, in .toml form, with the respective environmental variables commented to the side.
 This set of parameters needs to be set no matter the chain type.
 ```toml
 [database]
    name     = "vulcanize_public" # $DATABASE_NAME
    hostname = "localhost" # $DATABASE_HOSTNAME
    port     = 5432 # $DATABASE_PORT
    user     = "vdbm" # $DATABASE_USER
    password = "" # $DATABASE_PASSWORD
 [ipfs]
    path = "~/.ipfs" # $IPFS_PATH
 [resync]
    chain = "ethereum" # $RESYNC_CHAIN
    type = "state" # $RESYNC_TYPE
    start = 0 # $RESYNC_START
    stop = 1000 # $RESYNC_STOP
    batchSize = 10 # $RESYNC_BATCH_SIZE
    batchNumber = 100 # $RESYNC_BATCH_NUMBER
    clearOldCache = true # $RESYNC_CLEAR_OLD_CACHE
    resetValidation = true # $RESYNC_RESET_VALIDATION
 ```
 Additional parameters need to be set depending on the specific chain.
 For Bitcoin: 
 ```toml
 [bitcoin]
    httpPath = "127.0.0.1:8332" # $BTC_HTTP_PATH
    pass = "password" # $BTC_NODE_PASSWORD
    user = "username" # $BTC_NODE_USER
    nodeID = "ocd0" # $BTC_NODE_ID
    clientName = "Omnicore" # $BTC_CLIENT_NAME
    genesisBlock = "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" # $BTC_GENESIS_BLOCK
    networkID = "0xD9B4BEF9" # $BTC_NETWORK_ID
 ```
 For Ethereum:
 ```toml
 [ethereum]
    httpPath = "127.0.0.1:8545" # $ETH_HTTP_PATH
 ```
--- a/documentation/super_node/setup.md
+++ b/documentation/super_node/setup.md
@ -1,14 +1,35 @@
-## Super Node Setup
+# VulcanizeDB Super Node Setup
 Step-by-step instructions for manually setting up and running a VulcanizeDB super node.
-Vulcanizedb can act as an index for chain data stored on IPFS through the use of the `superNode` command. 
+Steps:
 1. [Postgres](#postgres)
 1. [Goose](#goose)
 1. [IPFS](#ipfs)
 1. [Blockchain](#blockchain)
 1. [VulcanizeDB](#vulcanizedb)
-### Manual Setup
+### Postgres
 A postgresDB is needed to storing all of the data in the vulcanizedb system.
 Postgres is used as the backing datastore for IPFS, and is used to index the CIDs for all of the chain data stored on IPFS.
 Follow the guides [here](https://wiki.postgresql.org/wiki/Detailed_installation_guides) for setting up Postgres.
-These commands work in conjunction with a [state-diffing full Geth node](https://github.com/vulcanize/go-ethereum/tree/statediffing)
+Once the Postgres server is running, we will need to make a database for vulcanizedb, e.g. `vulcanize_public`.
 and IPFS.
-#### IPFS
+`createdb vulcanize_public`
-To start, download and install [IPFS](https://github.com/vulcanize/go-ipfs)
+
 For running the automated tests, also create a database named `vulcanize_testing`.
 `createdb vulcanize_testing`
 ### Goose
 We use [goose](https://github.com/pressly/goose) as our migration management tool. While it is not necessary to use `goose` for manual setup, it
 is required for running the automated tests.
 ### IPFS
 We use IPFS to store IPLD objects for each type of data we extract from on chain.
 To start, download and install [IPFS](https://github.com/vulcanize/go-ipfs):
 `go get github.com/ipfs/go-ipfs`
@ -26,11 +47,11 @@ Start by adding the fork and switching over to it:
 `git checkout -b postgres_update vulcanize/postgres_update`
-Now install this fork of ipfs, first be sure to remove any previous installation.
+Now install this fork of ipfs, first be sure to remove any previous installation:
 `make install`
-Check that is installed properly by running
+Check that is installed properly by running:
 `ipfs`
@ -49,7 +70,7 @@ export IPFS_PGPORT=
 export IPFS_PGPASSWORD=
 ```
-And then run the ipfs command
+And then run the ipfs command:
 `ipfs init --profile=postgresds`
@ -62,10 +83,14 @@ and will ask us to enter the password, avoiding storing it to an ENV variable.
 Once we have initialized ipfs, that is all we need to do with it- we do not need to run a daemon during the subsequent processes (in fact, we can't).
-#### Geth 
+### Blockchain
-For Geth, we currently *require* a special fork, and we can set this up as follows:
+This section describes how to setup an Ethereum or Bitcoin node to serve as a data source for the super node
-Begin by downloading geth and switching to the vulcanize/rpc_statediffing branch
+#### Ethereum
 For Ethereum, we currently *require* [a special fork of go-ethereum](https://github.com/vulcanize/go-ethereum/tree/statediff_at_anyblock-1.9.11). This can be setup as follows.
 Skip this steps if you already have access to a node that displays
 Begin by downloading geth and switching to the vulcanize/rpc_statediffing branch:
 `go get github.com/ethereum/go-ethereum`
@ -75,9 +100,9 @@ Begin by downloading geth and switching to the vulcanize/rpc_statediffing branch
 `git fetch vulcanize`
-`git checkout -b statediffing vulcanize/statediff_at_anyblock-1.9.9`
+`git checkout -b statediffing vulcanize/statediff_at_anyblock-1.9.11`
-Now, install this fork of geth (make sure any old versions have been uninstalled/binaries removed first)
+Now, install this fork of geth (make sure any old versions have been uninstalled/binaries removed first):
 `make geth`
@ -87,163 +112,49 @@ And run the output binary with statediffing turned on:
 `./geth --statediff --statediff.streamblock --ws --syncmode=full`
 Note: if you wish to access historical data (perform `backFill`) then the node will need to operate as an archival node (`--gcmode=archive`)
 Note: other CLI options- statediff specific ones included- can be explored with `./geth help`
 The output from geth should mention that it is `Starting statediff service` and block synchronization should begin shortly thereafter.
-Note that until it receives a subscriber, the statediffing process does essentially nothing. Once a subscription is received, this 
+Note that until it receives a subscriber, the statediffing process does nothing but wait for one. Once a subscription is received, this
-will be indicated in the output. 
+will be indicated in the output and node will begin processing and sending statediffs.
-Also in the output will be the websocket url and ipc paths that we will use to subscribe to the statediffing process.
+Also in the output will be the endpoints that we will use to interface with the node.
-The default ws url is "ws://127.0.0.1:8546" and the default ipcPath- on Darwin systems only- is "Users/user/Library/Ethereum/geth.ipc"
+The default ws url is "127.0.0.1:8546" and the default http url is "127.0.0.1:8545".
 These values will be used as the `ethereum.wsPath` and `ethereum.httpPath` in the super node config, respectively.
-#### Vulcanizedb
+#### Bitcoin
 For Bitcoin, the super node is able to operate entirely through the universally exposed JSON-RPC interfaces.
 This means we can use any of the standard full nodes (e.g. bitcoind, btcd) as our data source.
-The `superNode` command is used to initialize and run an instance of the VulcanizeDB SuperNode
+Point at a remote node or set one up locally using the instructions for [bitcoind](https://github.com/bitcoin/bitcoin) and [btcd](https://github.com/btcsuite/btcd).
-Usage:
+The default http url is "127.0.0.1:8332". We will use the http endpoint as both the `bitcoin.wsPath` and `bitcoin.httpPath`
 (bitcoind does not support websocket endpoints, we are currently using a "subscription" wrapper around the http endpoints)
 ### Vulcanizedb
 Finally, we can begin the vulcanizeDB process itself.
 Start by downloading vulcanizedb and moving into the repo:
 `go get github.com/vulcanize/vulcanizedb`
 `cd $GOPATH/src/github.com/vulcanize/vulcanizedb`
 Run the db migrations against the Postgres database we created for vulcanizeDB:
 `goose -dir=./db/migrations postgres postgres://localhost:5432/vulcanize_public?sslmode=disable up`
 At this point, if we want to run the automated tests:
 `make test`
 `make integration_test`
 Then, build the vulcanizedb binary:
 `go build`
 And run the super node command with a provided [config](architecture.md/#):
 `./vulcanizedb superNode --config=<config_file.toml`
 The config file contains the parameters needed to initialize a super node with the appropriate chain(s), settings, and services
 The below example spins up a super node for btc and eth
 ```toml
 [superNode]
    chains = ["ethereum", "bitcoin"]
    ipfsPath = "/Users/iannorden/.ipfs"
    [superNode.ethereum.database]
        name     = "vulcanize_demo"
        hostname = "localhost"
        port     = 5432
        user     = "postgres"
    [superNode.ethereum.sync]
        on = true
        wsPath  = "ws://127.0.0.1:8546"
        workers = 1
    [superNode.ethereum.server]
        on = true
        ipcPath = "/Users/iannorden/.vulcanize/eth/vulcanize.ipc"
        wsPath = "127.0.0.1:8080"
        httpPath = "127.0.0.1:8081"
    [superNode.ethereum.backFill]
        on = true
        httpPath = "http://127.0.0.1:8545"
        frequency = 15
        batchSize = 50
    [superNode.bitcoin.database]
         name     = "vulcanize_demo"
         hostname = "localhost"
         port     = 5432
         user     = "postgres"
    [superNode.bitcoin.sync]
         on = true
         wsPath  = "127.0.0.1:8332"
         workers = 1
         pass = "GhhOhxL6GxteDhgzrTqj"
         user = "ocdrpc"
    [superNode.bitcoin.server]
         on = true
         ipcPath = "/Users/iannorden/.vulcanize/btc/vulcanize.ipc"
         wsPath = "127.0.0.1:8082"
         httpPath = "127.0.0.1:8083"
    [superNode.bitcoin.backFill]
         on = true
         httpPath = "127.0.0.1:8332"
         frequency = 15
         batchSize = 50
         pass = "GhhOhxL6GxteDhgzrTqj"
         user = "ocdrpc"
    [superNode.bitcoin.node]
         nodeID = "ocd0"
         clientName = "Omnicore"
         genesisBlock = "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f"
         networkID = "0xD9B4BEF9"
 ```
 ### Dockerfile Setup
 The below provides step-by-step directions for how to setup the super node using the provided Dockerfile on an AWS Linux AMI instance.
 Note that the instance will need sufficient memory and storage for this to work.
 1. Install basic dependencies 
 ```
 sudo yum update
 sudo yum install -y curl gpg gcc gcc-c++ make git
 ```
 2. Install Go 1.12
 ```
 wget https://dl.google.com/go/go1.12.6.linux-amd64.tar.gz
 tar -xzf go1.12.6.linux-amd64.tar.gz
 sudo mv go /usr/local
 ```
 3. Edit .bash_profile to export GOPATH
 ```
 export GOROOT=/usr/local/go
 export GOPATH=$HOME/go
 export PATH=$GOPATH/bin:$GOROOT/bin:$PATH
 ```
 4. Install and setup Postgres
 ```
 sudo yum install postgresql postgresql96-server
 sudo service postgresql96 initdb
 sudo service postgresql96 start
 sudo -u postgres createuser -s ec2-user
 sudo -u postgres createdb ec2-user
 sudo su postgres
 psql
 ALTER USER "ec2-user" WITH SUPERUSER;
 \q
 exit
 ```
 4b. Edit hba_file to trust local connections
 ```
 psql
 SHOW hba_file;
 /q
 sudo vim {PATH_TO_FILE}
 ```
 4c. Stop and restart Postgres server to affect changes
 ```
 sudo service postgresql96 stop
 sudo service postgresql96 start
 ```
 5. Install and start Docker (exit and re-enter ec2 instance afterwards to affect changes)
 ```
 sudo yum install -y docker
 sudo service  docker start
 sudo usermod -aG docker ec2-user
 ```
 6. Fetch the repository
 ```
 go get github.com/vulcanize/vulcanizedb
 cd $GOPATH/src/github.com/vulcanize/vulcanizedb
 ```
 7. Create the db
 ```
 createdb vulcanize_public
 ```
 8. Build and run the Docker image
 ```
 cd $GOPATH/src/github.com/vulcanize/vulcanizedb/dockerfiles/super_node
 docker build --build-arg CONFIG_FILE=environments/superNode.toml --build-arg EXPOSE_PORT_1=8080 --build-arg EXPOSE_PORT_2=8081 EXPOSE_PORT_3=8082 --build-arg EXPOSE_PORT_4=8083 .
 docker run --network host -e IPFS_INIT=true -e VDB_PG_NAME=vulcanize_public -e VDB_PG_HOSTNAME=localhost -e VDB_PG_PORT=5432 -e VDB_PG_USER=postgres -e VDB_PG_PASSWORD=password {IMAGE_ID}
 ```
--- a/documentation/super_node/subscription.md
+++ b/documentation/super_node/subscription.md
@ -1,88 +0,0 @@
 ## SuperNode Subscription
 A transformer can subscribe to the SueprNode service over its ipc or ws endpoints, when subscribing the transformer
 specifies the chain and a set of parameters which define which subsets of that chain's data the server should feed to them.
 ### Ethereum data
 The `streamEthSubscribe` command serves as a simple demonstration/example of subscribing to the super-node Ethereum feed, it subscribes with a set of parameters
 defined in the loaded config file, and prints the streamed data to stdout. To build transformers that subscribe to and use super-node Ethereum data,
 the shared/libraries/streamer can be used. 
 Usage: 
 `./vulcanizedb streamEthSubscribe --config=<config_file.toml>`
 The config for `streamEthSubscribe` has a set of parameters to fill the [EthSubscription config structure](../../pkg/super_node/config/eth_subscription.go)
 ```toml
 [superNode]
    [superNode.ethSubscription]
        historicalData = false
        historicalDataOnly = false
        startingBlock = 0
        endingBlock = 0
        wsPath = "ws://127.0.0.1:8080"
        [superNode.ethSubscription.headerFilter]
            off = false
            uncles = false
        [superNode.ethSubscription.txFilter]
            off = false
            src = []
            dst = []
        [superNode.ethSubscription.receiptFilter]
            off = false
            contracts = []
            topic0s = []
            topic1s = []
            topic2s = []
            topic3s = []
        [superNode.ethSubscription.stateFilter]
            off = false
            addresses = []
           intermediateNodes = false
        [superNode.ethSubscription.storageFilter]
            off = true
            addresses = []
            storageKeys = []
            intermediateNodes = false
 ```
 `ethSubscription.path` is used to define the SuperNode ws url OR ipc endpoint we subscribe to
 `ethSubscription.historicalData` specifies whether or not the super-node should look up historical data in its cache and
 send that to the subscriber, if this is set to `false` then the super-node only streams newly synced/incoming data
 `ethSubscription.historicalDataOnly` will tell the super-node to only send historical data with the specified range and
 not stream forward syncing data
 `ethSubscription.startingBlock` is the starting block number for the range we want to receive data in
 `ethSubscription.endingBlock` is the ending block number for the range we want to receive data in;
 setting to 0 means there is no end/we will continue streaming indefinitely.
 `ethSubscription.headerFilter` has two sub-options: `off` and `uncles`. Setting `off` to true tells the super-node to
 not send any headers to the subscriber; setting `uncles` to true tells the super-node to send uncles in addition to normal headers.
 `ethSubscription.txFilter` has three sub-options: `off`, `src`, and `dst`. Setting `off` to true tells the super-node to
 not send any transactions to the subscriber; `src` and `dst` are string arrays which can be filled with ETH addresses we want to filter transactions for,
 if they have any addresses then the super-node will only send transactions that were sent or received by the addresses contained
 in `src` and `dst`, respectively.
 `ethSubscription.receiptFilter` has four sub-options: `off`, `topics`, `contracts` and `matchTxs`. Setting `off` to true tells the super-node to
 not send any receipts to the subscriber; `topic0s` is a string array which can be filled with event topics we want to filter for,
 if it has any topics then the super-node will only send receipts that contain logs which have that topic0. Similarly, `contracts` is
 a string array which can be filled with contract addresses we want to filter for, if it contains any contract addresses the super-node will
 only send receipts that correspond to one of those contracts. `matchTrxs` is a bool which when set to true any receipts that correspond to filtered for
 transactions will be sent by the super-node, regardless of whether or not the receipt satisfies the `topics` or `contracts` filters.
 `ethSubscription.stateFilter` has three sub-options: `off`, `addresses`, and `intermediateNodes`. Setting `off` to true tells the super-node to
 not send any state data to the subscriber; `addresses` is a string array which can be filled with ETH addresses we want to filter state for,
 if it has any addresses then the super-node will only send state leafs (accounts) corresponding to those account addresses. By default the super-node
 only sends along state leafs, if we want to receive branch and extension nodes as well `intermediateNodes` can be set to `true`.
 `ethSubscription.storageFilter` has four sub-options: `off`, `addresses`, `storageKeys`, and `intermediateNodes`. Setting `off` to true tells the super-node to
 not send any storage data to the subscriber; `addresses` is a string array which can be filled with ETH addresses we want to filter storage for,
 if it has any addresses then the super-node will only send storage nodes from the storage tries at those state addresses. `storageKeys` is another string
 array that can be filled with storage keys we want to filter storage data for. It is important to note that the storageKeys are the actual keccak256 hashes, whereas
 the addresses in the `addresses` fields are the ETH addresses and not their keccak256 hashes that serve as the actual state keys. By default the super-node
 only sends along storage leafs, if we want to receive branch and extension nodes as well `intermediateNodes` can be set to `true`.
--- a/documentation/super_node/watcher.md
+++ b/documentation/super_node/watcher.md
@ -0,0 +1,16 @@
 These are the components of a VulcanizeDB Watcher:
 * Data Fetcher/Streamer sources:
  * go-ethereum
  * bitcoind
  * btcd
  * IPFS
 * Transformers contain:
  * converter
  * publisher
  * indexer
 * Endpoints contain:
  * api
  * backend
  * filterer
  * retriever
    * ipld_server
--- a/environments/superNodeBTC.toml
+++ b/environments/superNodeBTC.toml
@ -1,6 +1,6 @@
 [database]
    name     = "vulcanize_public" # $DATABASE_NAME
-    hostname = "localhost" # &DATABASE_HOSTNAME
+    hostname = "localhost" # $DATABASE_HOSTNAME
    port     = 5432 # $DATABASE_PORT
    user     = "vdbm" # $DATABASE_USER
    password = "" # $DATABASE_PASSWORD
--- a/environments/superNodeETH.toml
+++ b/environments/superNodeETH.toml
@ -1,6 +1,6 @@
 [database]
    name     = "vulcanize_public" # $DATABASE_NAME
-    hostname = "localhost" # &DATABASE_HOSTNAME
+    hostname = "localhost" # $DATABASE_HOSTNAME
    port     = 5432 # $DATABASE_PORT
    user     = "vdbm" # $DATABASE_USER
    password = "" # $DATABASE_PASSWORD
--- a/pkg/ipfs/interfaces.go
+++ b/pkg/ipfs/interfaces.go
@ -0,0 +1,26 @@
 // VulcanizeDB
 // Copyright © 2019 Vulcanize
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU Affero General Public License for more details.
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 package ipfs
 import (
 	ipld "github.com/ipfs/go-ipld-format"
 )
 // DagPutter is a general interface for a dag putter
 type DagPutter interface {
 	DagPut(n ipld.Node) (string, error)
 }
--- a/pkg/super_node/btc/publisher.go
+++ b/pkg/super_node/btc/publisher.go
@ -28,9 +28,9 @@ import (
 // IPLDPublisher satisfies the IPLDPublisher for ethereum
 type IPLDPublisher struct {
-	HeaderPutter          shared.DagPutter
+	HeaderPutter          ipfs.DagPutter
-	TransactionPutter     shared.DagPutter
+	TransactionPutter     ipfs.DagPutter
-	TransactionTriePutter shared.DagPutter
+	TransactionTriePutter ipfs.DagPutter
 }
 // NewIPLDPublisher creates a pointer to a new Publisher which satisfies the IPLDPublisher interface
--- a/pkg/super_node/btc/subscription_config.go
+++ b/pkg/super_node/btc/subscription_config.go
@ -52,7 +52,7 @@ type TxFilter struct {
 }
 // Init is used to initialize a EthSubscription struct with env variables
-func NewEthSubscriptionConfig() (*SubscriptionSettings, error) {
+func NewBtcSubscriptionConfig() (*SubscriptionSettings, error) {
 	sc := new(SubscriptionSettings)
 	// Below default to false, which means we do not backfill by default
 	sc.BackFill = viper.GetBool("superNode.btcSubscription.historicalData")
--- a/pkg/super_node/eth/publisher.go
+++ b/pkg/super_node/eth/publisher.go
@ -36,13 +36,13 @@ import (
 // IPLDPublisher satisfies the IPLDPublisher for ethereum
 type IPLDPublisher struct {
-	HeaderPutter          shared.DagPutter
+	HeaderPutter          ipfs.DagPutter
-	TransactionPutter     shared.DagPutter
+	TransactionPutter     ipfs.DagPutter
-	TransactionTriePutter shared.DagPutter
+	TransactionTriePutter ipfs.DagPutter
-	ReceiptPutter         shared.DagPutter
+	ReceiptPutter         ipfs.DagPutter
-	ReceiptTriePutter     shared.DagPutter
+	ReceiptTriePutter     ipfs.DagPutter
-	StatePutter           shared.DagPutter
+	StatePutter           ipfs.DagPutter
-	StoragePutter         shared.DagPutter
+	StoragePutter         ipfs.DagPutter
 }
 // NewIPLDPublisher creates a pointer to a new Publisher which satisfies the IPLDPublisher interface
--- a/pkg/super_node/shared/intefaces.go
+++ b/pkg/super_node/shared/intefaces.go
@ -18,8 +18,6 @@ package shared
 import (
 	"math/big"
 	node "github.com/ipfs/go-ipld-format"
 )
 // PayloadStreamer streams chain-specific payloads to the provided channel
@ -71,11 +69,6 @@ type ClientSubscription interface {
 	Unsubscribe()
 }
 // DagPutter is a general interface for a dag putter
 type DagPutter interface {
 	DagPut(n node.Node) (string, error)
 }
 // Cleaner is for cleaning out data from the cache within the given ranges
 type Cleaner interface {
 	Clean(rngs [][2]uint64, t DataType) error
--- a/pkg/watcher/config.go
+++ b/pkg/watcher/config.go
@ -74,7 +74,7 @@ func NewWatcherConfig() (*Config, error) {
 			return nil, err
 		}
 	case shared.Bitcoin:
-		c.SubscriptionConfig, err = btc.NewEthSubscriptionConfig()
+		c.SubscriptionConfig, err = btc.NewBtcSubscriptionConfig()
 		if err != nil {
 			return nil, err
 		}