2021-03-02 00:47:21 +00:00
|
|
|
package splitstore
|
|
|
|
|
|
|
|
import (
|
|
|
|
"path/filepath"
|
2021-06-25 16:41:31 +00:00
|
|
|
"sync"
|
2021-03-02 00:47:21 +00:00
|
|
|
|
|
|
|
"golang.org/x/xerrors"
|
|
|
|
|
|
|
|
cid "github.com/ipfs/go-cid"
|
|
|
|
)
|
|
|
|
|
|
|
|
// MarkSet is a utility to keep track of seen CID, and later query for them.
|
|
|
|
//
|
|
|
|
// * If the expected dataset is large, it can be backed by a datastore (e.g. bbolt).
|
|
|
|
// * If a probabilistic result is acceptable, it can be backed by a bloom filter (default).
|
|
|
|
type MarkSet interface {
|
|
|
|
Mark(cid.Cid) error
|
|
|
|
Has(cid.Cid) (bool, error)
|
|
|
|
Close() error
|
|
|
|
}
|
|
|
|
|
|
|
|
// markBytes is deliberately a non-nil empty byte slice for serialization.
|
|
|
|
var markBytes = []byte{}
|
|
|
|
|
|
|
|
type MarkSetEnv interface {
|
|
|
|
Create(name string, sizeHint int64) (MarkSet, error)
|
|
|
|
Close() error
|
|
|
|
}
|
|
|
|
|
|
|
|
func OpenMarkSetEnv(path string, mtype string) (MarkSetEnv, error) {
|
|
|
|
switch mtype {
|
|
|
|
case "", "bloom":
|
2021-06-25 07:07:45 +00:00
|
|
|
return NewBloomMarkSetEnv(false)
|
|
|
|
case "bloomts":
|
|
|
|
return NewBloomMarkSetEnv(true)
|
2021-06-25 16:41:31 +00:00
|
|
|
case "map":
|
|
|
|
return NewMapMarkSetEnv(false)
|
|
|
|
case "mapts":
|
|
|
|
return NewMapMarkSetEnv(true)
|
2021-03-02 00:47:21 +00:00
|
|
|
case "bolt":
|
|
|
|
return NewBoltMarkSetEnv(filepath.Join(path, "markset.bolt"))
|
|
|
|
default:
|
|
|
|
return nil, xerrors.Errorf("unknown mark set type %s", mtype)
|
|
|
|
}
|
|
|
|
}
|
2021-06-25 16:41:31 +00:00
|
|
|
|
|
|
|
type MapMarkSetEnv struct {
|
|
|
|
ts bool
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ MarkSetEnv = (*MapMarkSetEnv)(nil)
|
|
|
|
|
|
|
|
type MapMarkSet struct {
|
2021-07-02 05:03:54 +00:00
|
|
|
mx sync.Mutex
|
|
|
|
set map[string]struct{}
|
2021-06-25 16:41:31 +00:00
|
|
|
|
|
|
|
ts bool
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ MarkSet = (*MapMarkSet)(nil)
|
|
|
|
|
|
|
|
func NewMapMarkSetEnv(ts bool) (*MapMarkSetEnv, error) {
|
|
|
|
return &MapMarkSetEnv{ts: ts}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *MapMarkSetEnv) Create(name string, sizeHint int64) (MarkSet, error) {
|
|
|
|
return &MapMarkSet{
|
2021-07-02 05:03:54 +00:00
|
|
|
set: make(map[string]struct{}),
|
|
|
|
ts: e.ts,
|
2021-06-25 16:41:31 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *MapMarkSetEnv) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MapMarkSet) Mark(cid cid.Cid) error {
|
|
|
|
if s.ts {
|
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
|
|
|
}
|
|
|
|
|
2021-07-02 05:03:54 +00:00
|
|
|
s.set[string(cid.Hash())] = struct{}{}
|
2021-06-25 16:41:31 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) {
|
|
|
|
if s.ts {
|
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
|
|
|
}
|
|
|
|
|
2021-07-02 05:03:54 +00:00
|
|
|
_, ok := s.set[string(cid.Hash())]
|
2021-06-25 16:41:31 +00:00
|
|
|
return ok, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MapMarkSet) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|