lotus/extern/sector-storage/stores/remote.go

408 lines
10 KiB
Go
Raw Normal View History

2020-03-23 11:40:02 +00:00
package stores
import (
"context"
"encoding/json"
"io/ioutil"
"math/bits"
2020-03-23 11:40:02 +00:00
"mime"
"net/http"
"net/url"
2020-03-23 11:40:02 +00:00
"os"
gopath "path"
2020-07-24 17:39:25 +00:00
"path/filepath"
2020-03-23 11:40:02 +00:00
"sort"
"sync"
"github.com/filecoin-project/lotus/extern/sector-storage/fsutil"
"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
"github.com/filecoin-project/lotus/extern/sector-storage/tarutil"
2020-03-23 11:40:02 +00:00
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/specs-storage/storage"
2020-03-23 11:40:02 +00:00
2020-08-16 10:40:35 +00:00
"github.com/hashicorp/go-multierror"
files "github.com/ipfs/go-ipfs-files"
"golang.org/x/xerrors"
2020-03-23 11:40:02 +00:00
)
2020-07-24 17:39:25 +00:00
var FetchTempSubdir = "fetching"
2020-03-23 11:40:02 +00:00
type Remote struct {
local *Local
index SectorIndex
auth http.Header
2020-07-24 14:43:41 +00:00
limit chan struct{}
2020-05-01 18:04:21 +00:00
fetchLk sync.Mutex
2020-04-29 15:58:55 +00:00
fetching map[abi.SectorID]chan struct{}
2020-03-23 11:40:02 +00:00
}
2020-09-06 16:54:00 +00:00
func (r *Remote) RemoveCopies(ctx context.Context, s abi.SectorID, types storiface.SectorFileType) error {
// TODO: do this on remotes too
// (not that we really need to do that since it's always called by the
// worker which pulled the copy)
return r.local.RemoveCopies(ctx, s, types)
}
2020-07-24 14:43:41 +00:00
func NewRemote(local *Local, index SectorIndex, auth http.Header, fetchLimit int) *Remote {
2020-03-23 11:40:02 +00:00
return &Remote{
local: local,
index: index,
auth: auth,
2020-04-29 15:58:55 +00:00
2020-07-24 14:43:41 +00:00
limit: make(chan struct{}, fetchLimit),
2020-04-29 15:58:55 +00:00
fetching: map[abi.SectorID]chan struct{}{},
2020-03-23 11:40:02 +00:00
}
}
func (r *Remote) AcquireSector(ctx context.Context, s storage.SectorRef, existing storiface.SectorFileType, allocate storiface.SectorFileType, pathType storiface.PathType, op storiface.AcquireMode) (storiface.SectorPaths, storiface.SectorPaths, error) {
2020-03-23 11:40:02 +00:00
if existing|allocate != existing^allocate {
2020-09-06 16:54:00 +00:00
return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.New("can't both find and allocate a sector")
2020-03-23 11:40:02 +00:00
}
2020-04-29 15:58:55 +00:00
for {
r.fetchLk.Lock()
c, locked := r.fetching[s.ID]
2020-04-29 15:58:55 +00:00
if !locked {
r.fetching[s.ID] = make(chan struct{})
2020-04-29 15:58:55 +00:00
r.fetchLk.Unlock()
break
}
r.fetchLk.Unlock()
select {
case <-c:
continue
case <-ctx.Done():
2020-09-06 16:54:00 +00:00
return storiface.SectorPaths{}, storiface.SectorPaths{}, ctx.Err()
2020-04-29 15:58:55 +00:00
}
}
defer func() {
r.fetchLk.Lock()
close(r.fetching[s.ID])
delete(r.fetching, s.ID)
2020-04-29 15:58:55 +00:00
r.fetchLk.Unlock()
}()
2020-03-23 11:40:02 +00:00
paths, stores, err := r.local.AcquireSector(ctx, s, existing, allocate, pathType, op)
2020-03-23 11:40:02 +00:00
if err != nil {
2020-09-06 16:54:00 +00:00
return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.Errorf("local acquire error: %w", err)
2020-03-23 11:40:02 +00:00
}
2020-09-06 16:54:00 +00:00
var toFetch storiface.SectorFileType
for _, fileType := range storiface.PathTypes {
2020-08-04 14:20:59 +00:00
if fileType&existing == 0 {
continue
}
2020-09-06 16:54:00 +00:00
if storiface.PathByType(paths, fileType) == "" {
2020-08-04 14:20:59 +00:00
toFetch |= fileType
}
}
apaths, ids, err := r.local.AcquireSector(ctx, s, storiface.FTNone, toFetch, pathType, op)
2020-08-04 14:20:59 +00:00
if err != nil {
2020-09-06 16:54:00 +00:00
return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.Errorf("allocate local sector for fetching: %w", err)
2020-08-04 14:20:59 +00:00
}
2020-09-06 16:54:00 +00:00
odt := storiface.FSOverheadSeal
if pathType == storiface.PathStorage {
odt = storiface.FsOverheadFinalized
2020-08-04 14:20:59 +00:00
}
releaseStorage, err := r.local.Reserve(ctx, s, toFetch, ids, odt)
2020-08-04 14:20:59 +00:00
if err != nil {
2020-09-06 16:54:00 +00:00
return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.Errorf("reserving storage space: %w", err)
2020-08-04 14:20:59 +00:00
}
defer releaseStorage()
2020-09-06 16:54:00 +00:00
for _, fileType := range storiface.PathTypes {
2020-03-23 11:40:02 +00:00
if fileType&existing == 0 {
continue
}
2020-09-06 16:54:00 +00:00
if storiface.PathByType(paths, fileType) != "" {
2020-03-23 11:40:02 +00:00
continue
}
2020-09-06 16:54:00 +00:00
dest := storiface.PathByType(apaths, fileType)
storageID := storiface.PathByType(ids, fileType)
2020-08-04 14:20:59 +00:00
url, err := r.acquireFromRemote(ctx, s.ID, fileType, dest)
2020-03-23 11:40:02 +00:00
if err != nil {
2020-09-06 16:54:00 +00:00
return storiface.SectorPaths{}, storiface.SectorPaths{}, err
2020-03-23 11:40:02 +00:00
}
2020-09-06 16:54:00 +00:00
storiface.SetPathByType(&paths, fileType, dest)
storiface.SetPathByType(&stores, fileType, storageID)
2020-03-23 11:40:02 +00:00
if err := r.index.StorageDeclareSector(ctx, ID(storageID), s.ID, fileType, op == storiface.AcquireMove); err != nil {
2020-03-23 11:40:02 +00:00
log.Warnf("declaring sector %v in %s failed: %+v", s, storageID, err)
continue
}
2020-09-06 16:54:00 +00:00
if op == storiface.AcquireMove {
if err := r.deleteFromRemote(ctx, url); err != nil {
log.Warnf("deleting sector %v from %s (delete %s): %+v", s, storageID, url, err)
}
2020-03-23 11:40:02 +00:00
}
}
2020-06-04 19:00:16 +00:00
return paths, stores, nil
2020-03-23 11:40:02 +00:00
}
2020-08-04 14:20:59 +00:00
func tempFetchDest(spath string, create bool) (string, error) {
2020-07-24 17:39:25 +00:00
st, b := filepath.Split(spath)
tempdir := filepath.Join(st, FetchTempSubdir)
2020-08-04 14:20:59 +00:00
if create {
2020-08-16 10:40:35 +00:00
if err := os.MkdirAll(tempdir, 0755); err != nil { // nolint
2020-08-04 14:20:59 +00:00
return "", xerrors.Errorf("creating temp fetch dir: %w", err)
}
2020-07-24 17:39:25 +00:00
}
return filepath.Join(tempdir, b), nil
}
2020-09-06 16:54:00 +00:00
func (r *Remote) acquireFromRemote(ctx context.Context, s abi.SectorID, fileType storiface.SectorFileType, dest string) (string, error) {
2020-08-11 07:27:03 +00:00
si, err := r.index.StorageFindSector(ctx, s, fileType, 0, false)
2020-03-23 11:40:02 +00:00
if err != nil {
2020-08-04 14:20:59 +00:00
return "", err
2020-03-23 11:40:02 +00:00
}
if len(si) == 0 {
2020-08-04 14:20:59 +00:00
return "", xerrors.Errorf("failed to acquire sector %v from remote(%d): %w", s, fileType, storiface.ErrSectorNotFound)
}
2020-05-01 18:04:21 +00:00
sort.Slice(si, func(i, j int) bool {
2020-03-23 11:40:02 +00:00
return si[i].Weight < si[j].Weight
})
var merr error
for _, info := range si {
// TODO: see what we have local, prefer that
2020-03-23 11:40:02 +00:00
for _, url := range info.URLs {
2020-08-04 14:20:59 +00:00
tempDest, err := tempFetchDest(dest, true)
2020-07-24 17:39:25 +00:00
if err != nil {
2020-08-04 14:20:59 +00:00
return "", err
2020-07-24 17:39:25 +00:00
}
if err := os.RemoveAll(dest); err != nil {
2020-08-04 14:20:59 +00:00
return "", xerrors.Errorf("removing dest: %w", err)
2020-07-24 17:39:25 +00:00
}
2020-07-24 14:54:00 +00:00
2020-07-24 17:39:25 +00:00
err = r.fetch(ctx, url, tempDest)
2020-03-23 11:40:02 +00:00
if err != nil {
2020-07-24 14:54:00 +00:00
merr = multierror.Append(merr, xerrors.Errorf("fetch error %s (storage %s) -> %s: %w", url, info.ID, tempDest, err))
2020-03-23 11:40:02 +00:00
continue
}
2020-07-24 14:54:00 +00:00
if err := move(tempDest, dest); err != nil {
2020-08-04 14:20:59 +00:00
return "", xerrors.Errorf("fetch move error (storage %s) %s -> %s: %w", info.ID, tempDest, dest, err)
2020-07-24 14:54:00 +00:00
}
2020-03-23 11:40:02 +00:00
if merr != nil {
log.Warnw("acquireFromRemote encountered errors when fetching sector from remote", "errors", merr)
}
2020-08-04 14:20:59 +00:00
return url, nil
2020-03-23 11:40:02 +00:00
}
}
2020-08-04 14:20:59 +00:00
return "", xerrors.Errorf("failed to acquire sector %v from remote (tried %v): %w", s, si, merr)
2020-03-23 11:40:02 +00:00
}
func (r *Remote) fetch(ctx context.Context, url, outname string) error {
2020-03-23 11:40:02 +00:00
log.Infof("Fetch %s -> %s", url, outname)
2020-07-24 14:43:41 +00:00
if len(r.limit) >= cap(r.limit) {
log.Infof("Throttling fetch, %d already running", len(r.limit))
}
// TODO: Smarter throttling
// * Priority (just going sequentially is still pretty good)
// * Per interface
// * Aware of remote load
select {
case r.limit <- struct{}{}:
defer func() { <-r.limit }()
case <-ctx.Done():
return xerrors.Errorf("context error while waiting for fetch limiter: %w", ctx.Err())
}
2020-03-23 11:40:02 +00:00
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return xerrors.Errorf("request: %w", err)
}
req.Header = r.auth
req = req.WithContext(ctx)
2020-03-23 11:40:02 +00:00
resp, err := http.DefaultClient.Do(req)
if err != nil {
return xerrors.Errorf("do request: %w", err)
}
2020-08-16 10:40:35 +00:00
defer resp.Body.Close() // nolint
2020-03-23 11:40:02 +00:00
if resp.StatusCode != 200 {
return xerrors.Errorf("non-200 code: %d", resp.StatusCode)
}
/*bar := pb.New64(w.sizeForType(typ))
bar.ShowPercent = true
bar.ShowSpeed = true
bar.Units = pb.U_BYTES
barreader := bar.NewProxyReader(resp.Body)
bar.Start()
defer bar.Finish()*/
mediatype, _, err := mime.ParseMediaType(resp.Header.Get("Content-Type"))
if err != nil {
return xerrors.Errorf("parse media type: %w", err)
}
if err := os.RemoveAll(outname); err != nil {
return xerrors.Errorf("removing dest: %w", err)
}
switch mediatype {
case "application/x-tar":
return tarutil.ExtractTar(resp.Body, outname)
case "application/octet-stream":
return files.WriteTo(files.NewReaderFile(resp.Body), outname)
default:
return xerrors.Errorf("unknown content type: '%s'", mediatype)
}
}
func (r *Remote) MoveStorage(ctx context.Context, s storage.SectorRef, types storiface.SectorFileType) error {
// Make sure we have the data local
_, _, err := r.AcquireSector(ctx, s, types, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove)
if err != nil {
return xerrors.Errorf("acquire src storage (remote): %w", err)
}
return r.local.MoveStorage(ctx, s, types)
}
2020-09-06 16:54:00 +00:00
func (r *Remote) Remove(ctx context.Context, sid abi.SectorID, typ storiface.SectorFileType, force bool) error {
if bits.OnesCount(uint(typ)) != 1 {
return xerrors.New("delete expects one file type")
}
2020-05-13 18:45:14 +00:00
if err := r.local.Remove(ctx, sid, typ, force); err != nil {
return xerrors.Errorf("remove from local: %w", err)
}
2020-08-11 07:27:03 +00:00
si, err := r.index.StorageFindSector(ctx, sid, typ, 0, false)
if err != nil {
return xerrors.Errorf("finding existing sector %d(t:%d) failed: %w", sid, typ, err)
}
for _, info := range si {
for _, url := range info.URLs {
if err := r.deleteFromRemote(ctx, url); err != nil {
log.Warnf("remove %s: %+v", url, err)
continue
}
break
}
}
return nil
}
func (r *Remote) deleteFromRemote(ctx context.Context, url string) error {
2020-03-23 11:40:02 +00:00
log.Infof("Delete %s", url)
req, err := http.NewRequest("DELETE", url, nil)
if err != nil {
return xerrors.Errorf("request: %w", err)
}
req.Header = r.auth
req = req.WithContext(ctx)
2020-03-23 11:40:02 +00:00
resp, err := http.DefaultClient.Do(req)
if err != nil {
return xerrors.Errorf("do request: %w", err)
}
2020-08-16 10:40:35 +00:00
defer resp.Body.Close() // nolint
2020-03-23 11:40:02 +00:00
if resp.StatusCode != 200 {
return xerrors.Errorf("non-200 code: %d", resp.StatusCode)
}
return nil
}
2020-07-08 14:58:09 +00:00
func (r *Remote) FsStat(ctx context.Context, id ID) (fsutil.FsStat, error) {
st, err := r.local.FsStat(ctx, id)
switch err {
case nil:
return st, nil
case errPathNotFound:
break
default:
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("local stat: %w", err)
}
si, err := r.index.StorageInfo(ctx, id)
if err != nil {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("getting remote storage info: %w", err)
}
if len(si.URLs) == 0 {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("no known URLs for remote storage %s", id)
}
rl, err := url.Parse(si.URLs[0])
if err != nil {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("failed to parse url: %w", err)
}
rl.Path = gopath.Join(rl.Path, "stat", string(id))
req, err := http.NewRequest("GET", rl.String(), nil)
if err != nil {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("request: %w", err)
}
req.Header = r.auth
req = req.WithContext(ctx)
resp, err := http.DefaultClient.Do(req)
if err != nil {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("do request: %w", err)
}
switch resp.StatusCode {
case 200:
break
case 404:
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, errPathNotFound
case 500:
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("fsstat: got http 500, then failed to read the error: %w", err)
}
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("fsstat: got http 500: %s", string(b))
}
2020-07-08 14:58:09 +00:00
var out fsutil.FsStat
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
2020-07-08 14:58:09 +00:00
return fsutil.FsStat{}, xerrors.Errorf("decoding fsstat: %w", err)
}
2020-08-16 10:40:35 +00:00
defer resp.Body.Close() // nolint
return out, nil
}
2020-03-23 11:40:02 +00:00
var _ Store = &Remote{}