diff --git a/indexer/base/CHANGELOG.md b/indexer/base/CHANGELOG.md new file mode 100644 index 0000000000..0c3c9d0385 --- /dev/null +++ b/indexer/base/CHANGELOG.md @@ -0,0 +1,37 @@ + + +# Changelog + +## [Unreleased] diff --git a/indexer/base/README.md b/indexer/base/README.md new file mode 100644 index 0000000000..356b99fcf3 --- /dev/null +++ b/indexer/base/README.md @@ -0,0 +1,32 @@ +# Indexer Base + +The indexer base module is designed to provide a stable, zero-dependency base layer for the built-in indexer functionality. Packages that integrate with the indexer should feel free to depend on this package without fear of any external dependencies being pulled in. + +The basic types for specifying index sources, targets and decoders are provided here. An indexing source should accept a `Listener` instance and invoke the provided callbacks in the correct order. An indexer should provide a `Listener` instance and perform indexing operations based on the data passed to its callbacks. A module that exposes logical updates in the form of `ObjectUpdate`s should implement the `IndexableModule` interface. + +## `Listener` Callback Order + +`Listener` callbacks should be called in this order + +```mermaid +sequenceDiagram + actor Source + participant Indexer + Source ->> Indexer: Initialize + Source -->> Indexer: InitializeModuleSchema + loop Block + Source ->> Indexer: StartBlock + Source ->> Indexer: OnBlockHeader + Source -->> Indexer: OnTx + Source -->> Indexer: OnEvent + Source -->> Indexer: OnKVPair + Source -->> Indexer: OnObjectUpdate + Source ->> Indexer: Commit + end +``` + +`Initialize` must be called before any other method and should only be invoked once. `InitializeModuleSchema` should be called at most once for every module with logical data. + +Sources will generally only call `InitializeModuleSchema` and `OnObjectUpdate` if they have native logical decoding capabilities. Usually, the indexer framework will provide this functionality based on `OnKVPair` data and `IndexableModule` implementations. + +`StartBlock` and `OnBlockHeader` should be called only once at the beginning of a block, and `Commit` should be called only once at the end of a block. The `OnTx`, `OnEvent`, `OnKVPair` and `OnObjectUpdate` must be called after `OnBlockHeader`, may be called multiple times within a block and indexers should not assume that the order is logical unless `InitializationData.HasEventAlignedWrites` is true. diff --git a/indexer/base/decoder.go b/indexer/base/decoder.go new file mode 100644 index 0000000000..1114661498 --- /dev/null +++ b/indexer/base/decoder.go @@ -0,0 +1,26 @@ +package indexerbase + +// DecodableModule is an interface that modules can implement to provide a ModuleDecoder. +// Usually these modules would also implement appmodule.AppModule, but that is not included +// to keep this package free of any dependencies. +type DecodableModule interface { + // ModuleDecoder returns a ModuleDecoder for the module. + ModuleDecoder() (ModuleDecoder, error) +} + +// ModuleDecoder is a struct that contains the schema and a KVDecoder for a module. +type ModuleDecoder struct { + // Schema is the schema for the module. + Schema ModuleSchema + + // KVDecoder is a function that decodes a key-value pair into an ObjectUpdate. + // If modules pass logical updates directly to the engine and don't require logical decoding of raw bytes, + // then this function should be nil. + KVDecoder KVDecoder +} + +// KVDecoder is a function that decodes a key-value pair into an ObjectUpdate. +// If the KV-pair doesn't represent an object update, the function should return false +// as the second return value. Error should only be non-nil when the decoder expected +// to parse a valid update and was unable to. +type KVDecoder = func(key, value []byte) (ObjectUpdate, bool, error) diff --git a/indexer/base/enum.go b/indexer/base/enum.go new file mode 100644 index 0000000000..b1275d8a7a --- /dev/null +++ b/indexer/base/enum.go @@ -0,0 +1,10 @@ +package indexerbase + +// EnumDefinition represents the definition of an enum type. +type EnumDefinition struct { + // Name is the name of the enum type. + Name string + + // Values is a list of distinct values that are part of the enum type. + Values []string +} diff --git a/indexer/base/field.go b/indexer/base/field.go new file mode 100644 index 0000000000..713abdeb4c --- /dev/null +++ b/indexer/base/field.go @@ -0,0 +1,19 @@ +package indexerbase + +// Field represents a field in an object type. +type Field struct { + // Name is the name of the field. + Name string + + // Kind is the basic type of the field. + Kind Kind + + // Nullable indicates whether null values are accepted for the field. + Nullable bool + + // AddressPrefix is the address prefix of the field's kind, currently only used for Bech32AddressKind. + AddressPrefix string + + // EnumDefinition is the definition of the enum type and is only valid when Kind is EnumKind. + EnumDefinition EnumDefinition +} diff --git a/indexer/base/go.mod b/indexer/base/go.mod new file mode 100644 index 0000000000..a373d4dc83 --- /dev/null +++ b/indexer/base/go.mod @@ -0,0 +1,7 @@ +module cosmossdk.io/indexer/base + +// NOTE: this go.mod should have zero dependencies and remain on go 1.12 to stay compatible +// with all known production releases of the Cosmos SDK. This is to ensure that all historical +// apps could be patched to support indexing if desired. + +go 1.12 diff --git a/indexer/base/go.sum b/indexer/base/go.sum new file mode 100644 index 0000000000..e69de29bb2 diff --git a/indexer/base/kind.go b/indexer/base/kind.go new file mode 100644 index 0000000000..064a6543be --- /dev/null +++ b/indexer/base/kind.go @@ -0,0 +1,82 @@ +package indexerbase + +// Kind represents the basic type of a field in an object. +// Each kind defines the types of go values which should be accepted +// by listeners and generated by decoders when providing entity updates. +type Kind int + +const ( + // InvalidKind indicates that an invalid type. + InvalidKind Kind = iota + + // StringKind is a string type and values of this type must be of the go type string + // or implement fmt.Stringer(). + StringKind + + // BytesKind is a bytes type and values of this type must be of the go type []byte. + BytesKind + + // Int8Kind is an int8 type and values of this type must be of the go type int8. + Int8Kind + + // Uint8Kind is a uint8 type and values of this type must be of the go type uint8. + Uint8Kind + + // Int16Kind is an int16 type and values of this type must be of the go type int16. + Int16Kind + + // Uint16Kind is a uint16 type and values of this type must be of the go type uint16. + Uint16Kind + + // Int32Kind is an int32 type and values of this type must be of the go type int32. + Int32Kind + + // Uint32Kind is a uint32 type and values of this type must be of the go type uint32. + Uint32Kind + + // Int64Kind is an int64 type and values of this type must be of the go type int64. + Int64Kind + + // Uint64Kind is a uint64 type and values of this type must be of the go type uint64. + Uint64Kind + + // IntegerKind represents an arbitrary precision integer number. Values of this type must + // be of the go type int64, string or a type that implements fmt.Stringer with the resulted string + // formatted as an integer number. + IntegerKind + + // DecimalKind represents an arbitrary precision decimal or integer number. Values of this type + // must be of the go type string or a type that implements fmt.Stringer with the resulting string + // formatted as decimal numbers with an optional fractional part. Exponential E-notation + // is supported but NaN and Infinity are not. + DecimalKind + + // BoolKind is a boolean type and values of this type must be of the go type bool. + BoolKind + + // TimeKind is a time type and values of this type must be of the go type time.Time. + TimeKind + + // DurationKind is a duration type and values of this type must be of the go type time.Duration. + DurationKind + + // Float32Kind is a float32 type and values of this type must be of the go type float32. + Float32Kind + + // Float64Kind is a float64 type and values of this type must be of the go type float64. + Float64Kind + + // Bech32AddressKind is a bech32 address type and values of this type must be of the go type string or []byte + // or a type which implements fmt.Stringer. Fields of this type are expected to set the AddressPrefix field + // in the field definition to the bech32 address prefix. + Bech32AddressKind + + // EnumKind is an enum type and values of this type must be of the go type string or implement fmt.Stringer. + // Fields of this type are expected to set the EnumDefinition field in the field definition to the enum + // definition. + EnumKind + + // JSONKind is a JSON type and values of this type can either be of go type json.RawMessage + // or any type that can be marshaled to JSON using json.Marshal. + JSONKind +) diff --git a/indexer/base/listener.go b/indexer/base/listener.go new file mode 100644 index 0000000000..f0accb6208 --- /dev/null +++ b/indexer/base/listener.go @@ -0,0 +1,119 @@ +package indexerbase + +import ( + "encoding/json" +) + +// Listener is an interface that defines methods for listening to both raw and logical blockchain data. +// It is valid for any of the methods to be nil, in which case the listener will not be called for that event. +// Listeners should understand the guarantees that are provided by the source they are listening to and +// understand which methods will or will not be called. For instance, most blockchains will not do logical +// decoding of data out of the box, so the InitializeModuleSchema and OnObjectUpdate methods will not be called. +// These methods will only be called when listening logical decoding is setup. +type Listener struct { + // Initialize is called when the listener is initialized before any other methods are called. + // The lastBlockPersisted return value should be the last block height the listener persisted if it is + // persisting block data, 0 if it is not interested in persisting block data, or -1 if it is + // persisting block data but has not persisted any data yet. This check allows the indexer + // framework to ensure that the listener has not missed blocks. + Initialize func(InitializationData) (lastBlockPersisted int64, err error) + + // StartBlock is called at the beginning of processing a block. + StartBlock func(uint64) error + + // OnBlockHeader is called when a block header is received. + OnBlockHeader func(BlockHeaderData) error + + // OnTx is called when a transaction is received. + OnTx func(TxData) error + + // OnEvent is called when an event is received. + OnEvent func(EventData) error + + // OnKVPair is called when a key-value has been written to the store for a given module. + OnKVPair func(moduleName string, key, value []byte, delete bool) error + + // Commit is called when state is committed, usually at the end of a block. Any + // indexers should commit their data when this is called and return an error if + // they are unable to commit. + Commit func() error + + // InitializeModuleSchema should be called whenever the blockchain process starts OR whenever + // logical decoding of a module is initiated. An indexer listening to this event + // should ensure that they have performed whatever initialization steps (such as database + // migrations) required to receive OnObjectUpdate events for the given module. If the + // indexer's schema is incompatible with the module's on-chain schema, the listener should return + // an error. + InitializeModuleSchema func(module string, schema ModuleSchema) error + + // OnObjectUpdate is called whenever an object is updated in a module's state. This is only called + // when logical data is available. It should be assumed that the same data in raw form + // is also passed to OnKVPair. + OnObjectUpdate func(module string, update ObjectUpdate) error +} + +// InitializationData represents initialization data that is passed to a listener. +type InitializationData struct { + // HasEventAlignedWrites indicates that the blockchain data source will emit KV-pair events + // in an order aligned with transaction, message and event callbacks. If this is true + // then indexers can assume that KV-pair data is associated with these specific transactions, messages + // and events. This may be useful for indexers which store a log of all operations (such as immutable + // or version controlled databases) so that the history log can include fine grain correlation between + // state updates and transactions, messages and events. If this value is false, then indexers should + // assume that KV-pair data occurs out of order with respect to transaction, message and event callbacks - + // the only safe assumption being that KV-pair data is associated with the block in which it was emitted. + HasEventAlignedWrites bool +} + +// BlockHeaderData represents the raw block header data that is passed to a listener. +type BlockHeaderData struct { + // Height is the height of the block. + Height uint64 + + // Bytes is the raw byte representation of the block header. + Bytes ToBytes + + // JSON is the JSON representation of the block header. It should generally be a JSON object. + JSON ToJSON +} + +// TxData represents the raw transaction data that is passed to a listener. +type TxData struct { + // TxIndex is the index of the transaction in the block. + TxIndex int32 + + // Bytes is the raw byte representation of the transaction. + Bytes ToBytes + + // JSON is the JSON representation of the transaction. It should generally be a JSON object. + JSON ToJSON +} + +// EventData represents event data that is passed to a listener. +type EventData struct { + // TxIndex is the index of the transaction in the block to which this event is associated. + // It should be set to a negative number if the event is not associated with a transaction. + // Canonically -1 should be used to represent begin block processing and -2 should be used to + // represent end block processing. + TxIndex int32 + + // MsgIndex is the index of the message in the transaction to which this event is associated. + // If TxIndex is negative, this index could correspond to the index of the message in + // begin or end block processing if such indexes exist, or it can be set to zero. + MsgIndex uint32 + + // EventIndex is the index of the event in the message to which this event is associated. + EventIndex uint32 + + // Type is the type of the event. + Type string + + // Data is the JSON representation of the event data. It should generally be a JSON object. + Data ToJSON +} + +// ToBytes is a function that lazily returns the raw byte representation of data. +type ToBytes = func() ([]byte, error) + +// ToJSON is a function that lazily returns the JSON representation of data. +type ToJSON = func() (json.RawMessage, error) diff --git a/indexer/base/module_schema.go b/indexer/base/module_schema.go new file mode 100644 index 0000000000..a93300bfd9 --- /dev/null +++ b/indexer/base/module_schema.go @@ -0,0 +1,7 @@ +package indexerbase + +// ModuleSchema represents the logical schema of a module for purposes of indexing and querying. +type ModuleSchema struct { + // ObjectTypes describe the types of objects that are part of the module's schema. + ObjectTypes []ObjectType +} diff --git a/indexer/base/object_type.go b/indexer/base/object_type.go new file mode 100644 index 0000000000..06f8adbc92 --- /dev/null +++ b/indexer/base/object_type.go @@ -0,0 +1,23 @@ +package indexerbase + +// ObjectType describes an object type a module schema. +type ObjectType struct { + // Name is the name of the object. + Name string + + // KeyFields is a list of fields that make up the primary key of the object. + // It can be empty in which case indexers should assume that this object is + // a singleton and ony has one value. + KeyFields []Field + + // ValueFields is a list of fields that are not part of the primary key of the object. + // It can be empty in the case where all fields are part of the primary key. + ValueFields []Field + + // RetainDeletions is a flag that indicates whether the indexer should retain + // deleted rows in the database and flag them as deleted rather than actually + // deleting the row. For many types of data in state, the data is deleted even + // though it is still valid in order to save space. Indexers will want to have + // the option of retaining such data and distinguishing from other "true" deletions. + RetainDeletions bool +} diff --git a/indexer/base/object_update.go b/indexer/base/object_update.go new file mode 100644 index 0000000000..05b5962930 --- /dev/null +++ b/indexer/base/object_update.go @@ -0,0 +1,40 @@ +package indexerbase + +// ObjectUpdate represents an update operation on an object in a module's state. +type ObjectUpdate struct { + // TypeName is the name of the object type in the module's schema. + TypeName string + + // Key returns the value of the primary key of the object and must conform to these constraints with respect + // that the schema that is defined for the object: + // - if key represents a single field, then the value must be valid for the first field in that + // field list. For instance, if there is one field in the key of type String, then the value must be of + // type string + // - if key represents multiple fields, then the value must be a slice of values where each value is valid + // for the corresponding field in the field list. For instance, if there are two fields in the key of + // type String, String, then the value must be a slice of two strings. + // If the key has no fields, meaning that this is a singleton object, then this value is ignored and can be nil. + Key interface{} + + // Value returns the non-primary key fields of the object and can either conform to the same constraints + // as ObjectUpdate.Key or it may be and instance of ValueUpdates. ValueUpdates can be used as a performance + // optimization to avoid copying the values of the object into the update and/or to omit unchanged fields. + // If this is a delete operation, then this value is ignored and can be nil. + Value interface{} + + // Delete is a flag that indicates whether this update is a delete operation. If true, then the Value field + // is ignored and can be nil. + Delete bool +} + +// ValueUpdates is an interface that represents the value fields of an object update. fields that +// were not updated may be excluded from the update. Consumers should be aware that implementations +// may not filter out fields that were unchanged. However, if a field is omitted from the update +// it should be considered unchanged. +type ValueUpdates interface { + // Iterate iterates over the fields and values in the object update. The function should return + // true to continue iteration or false to stop iteration. Each field value should conform + // to the requirements of that field's type in the schema. Iterate returns an error if + // it was unable to decode the values properly (which could be the case in lazy evaluation). + Iterate(func(col string, value interface{}) bool) error +}