2021-07-06 11:44:40 +00:00
|
|
|
package splitstore
|
|
|
|
|
|
|
|
import (
|
2022-01-28 13:41:33 +00:00
|
|
|
"bufio"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2021-07-06 11:44:40 +00:00
|
|
|
"sync"
|
|
|
|
|
2022-06-15 10:06:22 +00:00
|
|
|
"github.com/ipfs/go-cid"
|
2022-06-14 15:00:51 +00:00
|
|
|
"golang.org/x/xerrors"
|
2021-07-06 11:44:40 +00:00
|
|
|
)
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
type MapMarkSetEnv struct {
|
|
|
|
path string
|
|
|
|
}
|
2021-07-06 11:44:40 +00:00
|
|
|
|
|
|
|
var _ MarkSetEnv = (*MapMarkSetEnv)(nil)
|
|
|
|
|
|
|
|
type MapMarkSet struct {
|
2021-07-08 07:20:29 +00:00
|
|
|
mx sync.RWMutex
|
2021-07-06 11:44:40 +00:00
|
|
|
set map[string]struct{}
|
2022-01-28 13:41:33 +00:00
|
|
|
|
|
|
|
persist bool
|
|
|
|
file *os.File
|
|
|
|
buf *bufio.Writer
|
|
|
|
|
|
|
|
path string
|
2021-07-06 11:44:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var _ MarkSet = (*MapMarkSet)(nil)
|
2021-07-30 06:42:20 +00:00
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
func NewMapMarkSetEnv(path string) (*MapMarkSetEnv, error) {
|
|
|
|
msPath := filepath.Join(path, "markset.map")
|
|
|
|
err := os.MkdirAll(msPath, 0755) //nolint:gosec
|
|
|
|
if err != nil {
|
|
|
|
return nil, xerrors.Errorf("error creating markset directory: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return &MapMarkSetEnv{path: msPath}, nil
|
2021-07-06 11:44:40 +00:00
|
|
|
}
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
func (e *MapMarkSetEnv) New(name string, sizeHint int64) (MarkSet, error) {
|
|
|
|
path := filepath.Join(e.path, name)
|
2021-07-06 11:44:40 +00:00
|
|
|
return &MapMarkSet{
|
2022-01-28 13:41:33 +00:00
|
|
|
set: make(map[string]struct{}, sizeHint),
|
|
|
|
path: path,
|
2021-07-06 11:44:40 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
func (e *MapMarkSetEnv) Recover(name string) (MarkSet, error) {
|
|
|
|
path := filepath.Join(e.path, name)
|
|
|
|
s := &MapMarkSet{
|
|
|
|
set: make(map[string]struct{}),
|
|
|
|
path: path,
|
|
|
|
}
|
|
|
|
|
|
|
|
in, err := os.Open(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, xerrors.Errorf("error opening markset file for read: %w", err)
|
|
|
|
}
|
2022-01-30 13:43:52 +00:00
|
|
|
defer in.Close() //nolint:errcheck
|
2022-01-28 13:41:33 +00:00
|
|
|
|
|
|
|
// wrap a buffered reader to make this faster
|
|
|
|
buf := bufio.NewReader(in)
|
|
|
|
for {
|
|
|
|
var sz byte
|
|
|
|
if sz, err = buf.ReadByte(); err != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
key := make([]byte, int(sz))
|
2022-01-30 13:11:18 +00:00
|
|
|
if _, err = io.ReadFull(buf, key); err != nil {
|
2022-01-28 13:41:33 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
s.set[string(key)] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != io.EOF {
|
|
|
|
return nil, xerrors.Errorf("error reading markset file: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
file, err := os.OpenFile(s.path, os.O_WRONLY|os.O_APPEND, 0)
|
|
|
|
if err != nil {
|
|
|
|
return nil, xerrors.Errorf("error opening markset file for write: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
s.persist = true
|
|
|
|
s.file = file
|
|
|
|
s.buf = bufio.NewWriter(file)
|
|
|
|
|
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
|
2021-07-06 11:44:40 +00:00
|
|
|
func (e *MapMarkSetEnv) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
func (s *MapMarkSet) BeginCriticalSection() error {
|
2022-01-25 14:31:45 +00:00
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
2021-07-06 11:44:40 +00:00
|
|
|
|
2021-07-08 07:18:43 +00:00
|
|
|
if s.set == nil {
|
|
|
|
return errMarkSetClosed
|
|
|
|
}
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
if s.persist {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
file, err := os.OpenFile(s.path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
|
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("error opening markset file: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// wrap a buffered writer to make this faster
|
|
|
|
s.buf = bufio.NewWriter(file)
|
|
|
|
for key := range s.set {
|
|
|
|
if err := s.writeKey([]byte(key), false); err != nil {
|
|
|
|
_ = file.Close()
|
|
|
|
s.buf = nil
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := s.buf.Flush(); err != nil {
|
|
|
|
_ = file.Close()
|
|
|
|
s.buf = nil
|
|
|
|
return xerrors.Errorf("error flushing markset file buffer: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
s.file = file
|
|
|
|
s.persist = true
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MapMarkSet) EndCriticalSection() {
|
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
|
|
|
|
|
|
|
if !s.persist {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
_ = s.file.Close()
|
|
|
|
_ = os.Remove(s.path)
|
|
|
|
s.file = nil
|
|
|
|
s.buf = nil
|
|
|
|
s.persist = false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MapMarkSet) Mark(c cid.Cid) error {
|
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
|
|
|
|
|
|
|
if s.set == nil {
|
|
|
|
return errMarkSetClosed
|
|
|
|
}
|
|
|
|
|
|
|
|
hash := c.Hash()
|
|
|
|
s.set[string(hash)] = struct{}{}
|
|
|
|
|
|
|
|
if s.persist {
|
2022-01-30 10:10:08 +00:00
|
|
|
if err := s.writeKey(hash, true); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := s.file.Sync(); err != nil {
|
|
|
|
return xerrors.Errorf("error syncing markset: %w", err)
|
|
|
|
}
|
2022-01-28 13:41:33 +00:00
|
|
|
}
|
|
|
|
|
2021-07-06 11:44:40 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-01-30 09:27:24 +00:00
|
|
|
func (s *MapMarkSet) MarkMany(batch []cid.Cid) error {
|
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
|
|
|
|
|
|
|
if s.set == nil {
|
|
|
|
return errMarkSetClosed
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, c := range batch {
|
|
|
|
hash := c.Hash()
|
|
|
|
s.set[string(hash)] = struct{}{}
|
|
|
|
|
|
|
|
if s.persist {
|
|
|
|
if err := s.writeKey(hash, false); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if s.persist {
|
2022-01-30 10:10:08 +00:00
|
|
|
if err := s.buf.Flush(); err != nil {
|
|
|
|
return xerrors.Errorf("error flushing markset buffer to disk: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := s.file.Sync(); err != nil {
|
|
|
|
return xerrors.Errorf("error syncing markset: %w", err)
|
|
|
|
}
|
2022-01-30 09:27:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-06 11:44:40 +00:00
|
|
|
func (s *MapMarkSet) Has(cid cid.Cid) (bool, error) {
|
2022-01-25 14:31:45 +00:00
|
|
|
s.mx.RLock()
|
|
|
|
defer s.mx.RUnlock()
|
2021-07-06 11:44:40 +00:00
|
|
|
|
2021-07-08 07:18:43 +00:00
|
|
|
if s.set == nil {
|
|
|
|
return false, errMarkSetClosed
|
|
|
|
}
|
|
|
|
|
2021-07-06 11:44:40 +00:00
|
|
|
_, ok := s.set[string(cid.Hash())]
|
|
|
|
return ok, nil
|
|
|
|
}
|
|
|
|
|
2021-07-30 19:07:45 +00:00
|
|
|
func (s *MapMarkSet) Visit(c cid.Cid) (bool, error) {
|
2022-01-25 14:31:45 +00:00
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
2021-07-09 01:26:36 +00:00
|
|
|
|
2021-07-30 19:07:45 +00:00
|
|
|
if s.set == nil {
|
2021-07-30 06:42:20 +00:00
|
|
|
return false, errMarkSetClosed
|
|
|
|
}
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
hash := c.Hash()
|
|
|
|
key := string(hash)
|
2021-07-30 19:07:45 +00:00
|
|
|
if _, ok := s.set[key]; ok {
|
2021-07-30 06:42:20 +00:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
2021-07-30 19:07:45 +00:00
|
|
|
s.set[key] = struct{}{}
|
2022-01-28 13:41:33 +00:00
|
|
|
|
|
|
|
if s.persist {
|
|
|
|
if err := s.writeKey(hash, true); err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2022-01-30 10:10:08 +00:00
|
|
|
if err := s.file.Sync(); err != nil {
|
|
|
|
return false, xerrors.Errorf("error syncing markset: %w", err)
|
|
|
|
}
|
2022-01-28 13:41:33 +00:00
|
|
|
}
|
|
|
|
|
2021-07-30 06:42:20 +00:00
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2021-07-30 19:07:45 +00:00
|
|
|
func (s *MapMarkSet) Close() error {
|
2022-01-25 14:31:45 +00:00
|
|
|
s.mx.Lock()
|
|
|
|
defer s.mx.Unlock()
|
|
|
|
|
2022-01-28 13:41:33 +00:00
|
|
|
if s.set == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-30 19:07:45 +00:00
|
|
|
s.set = nil
|
2022-01-28 13:41:33 +00:00
|
|
|
|
|
|
|
if s.file != nil {
|
|
|
|
if err := s.file.Close(); err != nil {
|
|
|
|
log.Warnf("error closing markset file: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !s.persist {
|
|
|
|
if err := os.Remove(s.path); err != nil {
|
|
|
|
log.Warnf("error removing markset file: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MapMarkSet) writeKey(k []byte, flush bool) error {
|
|
|
|
if err := s.buf.WriteByte(byte(len(k))); err != nil {
|
|
|
|
return xerrors.Errorf("error writing markset key length to disk: %w", err)
|
|
|
|
}
|
|
|
|
if _, err := s.buf.Write(k); err != nil {
|
|
|
|
return xerrors.Errorf("error writing markset key to disk: %w", err)
|
|
|
|
}
|
|
|
|
if flush {
|
|
|
|
if err := s.buf.Flush(); err != nil {
|
|
|
|
return xerrors.Errorf("error flushing markset buffer to disk: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-30 06:42:20 +00:00
|
|
|
return nil
|
|
|
|
}
|