From f843f1a4789c9d753415b104f2e4d265656a067b Mon Sep 17 00:00:00 2001 From: Aaron Craelius Date: Fri, 30 Aug 2024 17:23:17 -0400 Subject: [PATCH] feat(collections): support indexing (#20704) Co-authored-by: Facundo Medica <14063057+facundomedica@users.noreply.github.com> --- collections/CHANGELOG.md | 1 + collections/codec/indexing.go | 102 +++++++++++++++++ collections/collections.go | 22 ++++ collections/go.mod | 3 +- collections/go.sum | 2 + collections/indexing.go | 183 ++++++++++++++++++++++++++++++ collections/map.go | 5 + x/accounts/defaults/lockup/go.mod | 5 +- 8 files changed, 321 insertions(+), 2 deletions(-) create mode 100644 collections/codec/indexing.go create mode 100644 collections/indexing.go diff --git a/collections/CHANGELOG.md b/collections/CHANGELOG.md index 577ed12693..6be747f2f4 100644 --- a/collections/CHANGELOG.md +++ b/collections/CHANGELOG.md @@ -38,6 +38,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ * [#19343](https://github.com/cosmos/cosmos-sdk/pull/19343) Simplify IndexedMap creation by allowing to infer indexes through reflection. * [#19861](https://github.com/cosmos/cosmos-sdk/pull/19861) Add `NewJSONValueCodec` value codec as an alternative for `codec.CollValue` from the SDK for non protobuf types. * [#21090](https://github.com/cosmos/cosmos-sdk/pull/21090) Introduces `Quad`, a composite key with four keys. +* [#20704](https://github.com/cosmos/cosmos-sdk/pull/20704) Add `ModuleCodec` method to `Schema` and `HasSchemaCodec` interface in order to support `cosmossdk.io/schema` compatible indexing. ## [v0.4.0](https://github.com/cosmos/cosmos-sdk/releases/tag/collections%2Fv0.4.0) diff --git a/collections/codec/indexing.go b/collections/codec/indexing.go new file mode 100644 index 0000000000..26ff651cc8 --- /dev/null +++ b/collections/codec/indexing.go @@ -0,0 +1,102 @@ +package codec + +import ( + "encoding/json" + "fmt" + + "cosmossdk.io/schema" +) + +// HasSchemaCodec is an interface that all codec's should implement in order +// to properly support indexing. It is not required by KeyCodec or ValueCodec +// in order to preserve backwards compatibility, but a future version of collections +// may make it required and all codec's should aim to implement it. If it is not +// implemented, fallback defaults will be used for indexing that may be sub-optimal. +// +// Implementations of HasSchemaCodec should test that they are conformant using +// schema.ValidateObjectKey or schema.ValidateObjectValue depending on whether +// the codec is a KeyCodec or ValueCodec respectively. +type HasSchemaCodec[T any] interface { + // SchemaCodec returns the schema codec for the collections codec. + SchemaCodec() (SchemaCodec[T], error) +} + +// SchemaCodec is a codec that supports converting collection codec values to and +// from schema codec values. +type SchemaCodec[T any] struct { + // Fields are the schema fields that the codec represents. If this is empty, + // it will be assumed that this codec represents no value (such as an item key + // or key set value). + Fields []schema.Field + + // ToSchemaType converts a codec value of type T to a value corresponding to + // a schema object key or value (depending on whether this is a key or value + // codec). The returned value should pass validation with schema.ValidateObjectKey + // or schema.ValidateObjectValue with the fields specified in Fields. + // If this function is nil, it will be assumed that T already represents a + // value that conforms to a schema value without any further conversion. + ToSchemaType func(T) (any, error) + + // FromSchemaType converts a schema object key or value to T. + // If this function is nil, it will be assumed that T already represents a + // value that conforms to a schema value without any further conversion. + FromSchemaType func(any) (T, error) +} + +// KeySchemaCodec gets the schema codec for the provided KeyCodec either +// by casting to HasSchemaCodec or returning a fallback codec. +func KeySchemaCodec[K any](cdc KeyCodec[K]) (SchemaCodec[K], error) { + if indexable, ok := cdc.(HasSchemaCodec[K]); ok { + return indexable.SchemaCodec() + } else { + return FallbackSchemaCodec[K](), nil + } +} + +// ValueSchemaCodec gets the schema codec for the provided ValueCodec either +// by casting to HasSchemaCodec or returning a fallback codec. +func ValueSchemaCodec[V any](cdc ValueCodec[V]) (SchemaCodec[V], error) { + if indexable, ok := cdc.(HasSchemaCodec[V]); ok { + return indexable.SchemaCodec() + } else { + return FallbackSchemaCodec[V](), nil + } +} + +// FallbackSchemaCodec returns a fallback schema codec for T when one isn't explicitly +// specified with HasSchemaCodec. It maps all simple types directly to schema kinds +// and converts everything else to JSON. +func FallbackSchemaCodec[T any]() SchemaCodec[T] { + var t T + kind := schema.KindForGoValue(t) + if err := kind.Validate(); err == nil { + return SchemaCodec[T]{ + Fields: []schema.Field{{ + // we don't set any name so that this can be set to a good default by the caller + Name: "", + Kind: kind, + }}, + // these can be nil because T maps directly to a schema value for this kind + ToSchemaType: nil, + FromSchemaType: nil, + } + } else { + // we default to encoding everything to JSON + return SchemaCodec[T]{ + Fields: []schema.Field{{Kind: schema.JSONKind}}, + ToSchemaType: func(t T) (any, error) { + bz, err := json.Marshal(t) + return json.RawMessage(bz), err + }, + FromSchemaType: func(a any) (T, error) { + var t T + bz, ok := a.(json.RawMessage) + if !ok { + return t, fmt.Errorf("expected json.RawMessage, got %T", a) + } + err := json.Unmarshal(bz, &t) + return t, err + }, + } + } +} diff --git a/collections/collections.go b/collections/collections.go index 9de3bbc382..24eca492fc 100644 --- a/collections/collections.go +++ b/collections/collections.go @@ -6,6 +6,8 @@ import ( "io" "math" + "cosmossdk.io/schema" + "cosmossdk.io/collections/codec" ) @@ -90,6 +92,24 @@ type Collection interface { ValueCodec() codec.UntypedValueCodec genesisHandler + + // collectionSchemaCodec returns the schema codec for this collection. + schemaCodec() (*collectionSchemaCodec, error) + + // isSecondaryIndex indicates that this collection represents a secondary index + // in the schema and should be excluded from the module's user facing schema. + isSecondaryIndex() bool +} + +// collectionSchemaCodec maps a collection to a schema object type and provides +// decoders and encoders to and from schema values and raw kv-store bytes. +type collectionSchemaCodec struct { + coll Collection + objectType schema.ObjectType + keyDecoder func([]byte) (any, error) + valueDecoder func([]byte) (any, error) + keyEncoder func(any) ([]byte, error) + valueEncoder func(any) ([]byte, error) } // Prefix defines a segregation bytes namespace for specific collections objects. @@ -157,3 +177,5 @@ func (c collectionImpl[K, V]) exportGenesis(ctx context.Context, w io.Writer) er } func (c collectionImpl[K, V]) defaultGenesis(w io.Writer) error { return c.m.defaultGenesis(w) } + +func (c collectionImpl[K, V]) isSecondaryIndex() bool { return c.m.isSecondaryIndex } diff --git a/collections/go.mod b/collections/go.mod index 12cafa6d71..3eaa69315d 100644 --- a/collections/go.mod +++ b/collections/go.mod @@ -5,14 +5,15 @@ go 1.23 require ( cosmossdk.io/core v1.0.0 cosmossdk.io/core/testing v0.0.0-00010101000000-000000000000 + cosmossdk.io/schema v0.2.0 github.com/stretchr/testify v1.9.0 + github.com/tidwall/btree v1.7.0 pgregory.net/rapid v1.1.0 ) require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/tidwall/btree v1.7.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/collections/go.sum b/collections/go.sum index f59be83bdb..a616ad38d9 100644 --- a/collections/go.sum +++ b/collections/go.sum @@ -1,3 +1,5 @@ +cosmossdk.io/schema v0.2.0 h1:UH5CR1DqUq8yP+5Np8PbvG4YX0zAUsTN2Qk6yThmfMk= +cosmossdk.io/schema v0.2.0/go.mod h1:RDAhxIeNB4bYqAlF4NBJwRrgtnciMcyyg0DOKnhNZQQ= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/collections/indexing.go b/collections/indexing.go new file mode 100644 index 0000000000..6fbf5aa048 --- /dev/null +++ b/collections/indexing.go @@ -0,0 +1,183 @@ +package collections + +import ( + "bytes" + "fmt" + "strings" + + "cosmossdk.io/schema" + "github.com/tidwall/btree" + + "cosmossdk.io/collections/codec" +) + +// IndexingOptions are indexing options for the collections schema. +type IndexingOptions struct { + + // RetainDeletionsFor is the list of collections to retain deletions for. + RetainDeletionsFor []string +} + +// ModuleCodec returns the ModuleCodec for this schema for the provided options. +func (s Schema) ModuleCodec(opts IndexingOptions) (schema.ModuleCodec, error) { + decoder := moduleDecoder{ + collectionLookup: &btree.Map[string, *collectionSchemaCodec]{}, + } + + retainDeletions := make(map[string]bool) + for _, collName := range opts.RetainDeletionsFor { + retainDeletions[collName] = true + } + + var types []schema.Type + for _, collName := range s.collectionsOrdered { + coll := s.collectionsByName[collName] + + // skip secondary indexes + if coll.isSecondaryIndex() { + continue + } + + cdc, err := coll.schemaCodec() + if err != nil { + return schema.ModuleCodec{}, err + } + + if retainDeletions[coll.GetName()] { + cdc.objectType.RetainDeletions = true + } + + types = append(types, cdc.objectType) + + decoder.collectionLookup.Set(string(coll.GetPrefix()), cdc) + } + + modSchema, err := schema.CompileModuleSchema(types...) + if err != nil { + return schema.ModuleCodec{}, err + } + + return schema.ModuleCodec{ + Schema: modSchema, + KVDecoder: decoder.decodeKV, + }, nil +} + +type moduleDecoder struct { + // collectionLookup lets us efficiently look the correct collection based on raw key bytes + collectionLookup *btree.Map[string, *collectionSchemaCodec] +} + +func (m moduleDecoder) decodeKV(update schema.KVPairUpdate) ([]schema.ObjectUpdate, error) { + key := update.Key + ks := string(key) + var cd *collectionSchemaCodec + // we look for the collection whose prefix is less than this key + m.collectionLookup.Descend(ks, func(prefix string, cur *collectionSchemaCodec) bool { + bytesPrefix := cur.coll.GetPrefix() + if bytes.HasPrefix(key, bytesPrefix) { + cd = cur + return true + } + return false + }) + if cd == nil { + return nil, nil + } + + return cd.decodeKVPair(update) +} + +func (c collectionSchemaCodec) decodeKVPair(update schema.KVPairUpdate) ([]schema.ObjectUpdate, error) { + // strip prefix + key := update.Key + key = key[len(c.coll.GetPrefix()):] + + k, err := c.keyDecoder(key) + if err != nil { + return []schema.ObjectUpdate{ + {TypeName: c.coll.GetName()}, + }, err + + } + + if update.Remove { + return []schema.ObjectUpdate{ + {TypeName: c.coll.GetName(), Key: k, Delete: true}, + }, nil + } + + v, err := c.valueDecoder(update.Value) + if err != nil { + return []schema.ObjectUpdate{ + {TypeName: c.coll.GetName(), Key: k}, + }, err + } + + return []schema.ObjectUpdate{ + {TypeName: c.coll.GetName(), Key: k, Value: v}, + }, nil +} + +func (c collectionImpl[K, V]) schemaCodec() (*collectionSchemaCodec, error) { + res := &collectionSchemaCodec{ + coll: c, + } + res.objectType.Name = c.GetName() + + keyDecoder, err := codec.KeySchemaCodec(c.m.kc) + if err != nil { + return nil, err + } + res.objectType.KeyFields = keyDecoder.Fields + res.keyDecoder = func(i []byte) (any, error) { + _, x, err := c.m.kc.Decode(i) + if err != nil { + return nil, err + } + return keyDecoder.ToSchemaType(x) + } + ensureFieldNames(c.m.kc, "key", res.objectType.KeyFields) + + valueDecoder, err := codec.ValueSchemaCodec(c.m.vc) + if err != nil { + return nil, err + } + res.objectType.ValueFields = valueDecoder.Fields + res.valueDecoder = func(i []byte) (any, error) { + x, err := c.m.vc.Decode(i) + if err != nil { + return nil, err + } + return valueDecoder.ToSchemaType(x) + } + ensureFieldNames(c.m.vc, "value", res.objectType.ValueFields) + + return res, nil +} + +// ensureFieldNames makes sure that all fields have valid names - either the +// names were specified by user or they get filled +func ensureFieldNames(x any, defaultName string, cols []schema.Field) { + var names []string = nil + if hasName, ok := x.(interface{ Name() string }); ok { + name := hasName.Name() + if name != "" { + names = strings.Split(hasName.Name(), ",") + } + } + for i, col := range cols { + if names != nil && i < len(names) { + col.Name = names[i] + } else { + if col.Name == "" { + if i == 0 && len(cols) == 1 { + col.Name = defaultName + } else { + col.Name = fmt.Sprintf("%s%d", defaultName, i+1) + } + } + } + cols[i] = col + } +} diff --git a/collections/map.go b/collections/map.go index 0b9b247aa2..360d96feaf 100644 --- a/collections/map.go +++ b/collections/map.go @@ -20,6 +20,11 @@ type Map[K, V any] struct { sa func(context.Context) store.KVStore prefix []byte name string + + // isSecondaryIndex indicates that this map represents a secondary index + // on another collection and that it should be skipped when generating + // a user facing schema + isSecondaryIndex bool } // NewMap returns a Map given a StoreKey, a Prefix, human-readable name and the relative value and key encoders. diff --git a/x/accounts/defaults/lockup/go.mod b/x/accounts/defaults/lockup/go.mod index d2d1b8f0eb..70b590b78d 100644 --- a/x/accounts/defaults/lockup/go.mod +++ b/x/accounts/defaults/lockup/go.mod @@ -13,7 +13,10 @@ require ( github.com/cosmos/gogoproto v1.7.0 ) -require github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect +require ( + cosmossdk.io/schema v0.2.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect +) require ( buf.build/gen/go/cometbft/cometbft/protocolbuffers/go v1.34.2-20240701160653-fedbb9acfd2f.2 // indirect