lotus/lib/rpcenc/reader.go

487 lines
12 KiB
Go
Raw Permalink Normal View History

2020-08-14 14:06:53 +00:00
package rpcenc
import (
"context"
"encoding/json"
"errors"
2020-08-14 14:06:53 +00:00
"fmt"
"io"
"net/http"
"net/url"
"path"
"reflect"
"strconv"
"sync"
2020-08-14 21:12:37 +00:00
"time"
2020-08-14 14:06:53 +00:00
"github.com/google/uuid"
logging "github.com/ipfs/go-log/v2"
"golang.org/x/xerrors"
"github.com/filecoin-project/go-jsonrpc"
2020-09-07 03:49:10 +00:00
"github.com/filecoin-project/go-state-types/abi"
2022-05-24 14:22:52 +00:00
"github.com/filecoin-project/lotus/lib/httpreader"
"github.com/filecoin-project/lotus/storage/pipeline/lib/nullreader"
2020-08-14 14:06:53 +00:00
)
2020-08-14 21:12:37 +00:00
var log = logging.Logger("rpcenc")
var Timeout = 30 * time.Second
2020-08-14 14:06:53 +00:00
type StreamType string
2020-08-14 21:40:41 +00:00
2020-08-14 14:06:53 +00:00
const (
Null StreamType = "null"
PushStream StreamType = "push"
2022-05-24 14:22:52 +00:00
HTTP StreamType = "http"
2020-08-14 14:06:53 +00:00
// TODO: Data transfer handoff to workers?
)
type ReaderStream struct {
Type StreamType
Info string
}
2021-07-30 10:58:28 +00:00
var client = func() *http.Client {
c := *http.DefaultClient
c.CheckRedirect = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
return &c
}()
/*
Example rpc function:
Push(context.Context, io.Reader) error
Request flow:
1. Client invokes a method with an io.Reader param
2021-07-30 10:58:28 +00:00
2. go-jsonrpc invokes `ReaderParamEncoder` for the client-provided io.Reader
3. `ReaderParamEncoder` transforms the reader into a `ReaderStream` which can
be serialized as JSON, and sent as jsonrpc request parameter
3.1. If the reader is of type `*sealing.NullReader`, the resulting object
is `ReaderStream{ Type: "null", Info: "[base 10 number of bytes]" }`
3.2. If the reader is of type `*RpcReader`, and it wasn't read from, we
notify that RpcReader to go a different push endpoint, and return
2021-07-30 10:58:28 +00:00
a `ReaderStream` object like in 3.4.
3.3. In remaining cases we start a goroutine which:
3.3.1. Makes a HEAD request to the server push endpoint
3.3.2. If the HEAD request is redirected, it follows the redirect
3.3.3. If the request succeeds, it starts a POST request to the
endpoint to which the last HEAD request was sent with the
reader set as request body.
3.4. We return a `ReaderStream` indicating the uuid of push request, ex:
`ReaderStream{ Type: "push", Info: "[UUID string]" }`
4. If the reader wasn't a NullReader, the server will receive a HEAD (or
POST in case of older clients) request to the push endpoint.
4.1. The server gets or registers an `*RpcReader` in the `readers` map.
2021-07-30 10:58:28 +00:00
4.2. It waits for a request to a matching push endpoint to be opened
4.3. After the request is opened, it returns the `*RpcReader` to
2021-07-30 10:58:28 +00:00
go-jsonrpc, which will pass it as the io.Reader parameter to the
rpc method implementation
4.4. If the first request made to the push endpoint was a POST, the
returned `*RpcReader` acts as a simple reader reading the POST
2021-07-30 10:58:28 +00:00
request body
4.5. If the first request made to the push endpoint was a HEAD
4.5.1. On the first call to Read or Close the server responds with
a 200 OK header, the client starts a POST request to the same
push URL, and the reader starts passing through the POST request
body
4.5.2. If the reader is passed to another (now client) RPC method as a
reader parameter, the server for the first request responds to the
HEAD request with http 302 Found, instructing the first client to
go to the push endpoint of the second RPC server
5. If the reader was a NullReader (ReaderStream.Type=="null"), we instantiate
it, and provide to the method implementation
*/
2020-08-14 14:06:53 +00:00
func ReaderParamEncoder(addr string) jsonrpc.Option {
2021-07-30 10:58:28 +00:00
// Client side parameter encoder. Runs on the rpc client side. io.Reader -> ReaderStream{}
2020-08-14 14:06:53 +00:00
return jsonrpc.WithParamEncoder(new(io.Reader), func(value reflect.Value) (reflect.Value, error) {
r := value.Interface().(io.Reader)
if r, ok := r.(*nullreader.NullReader); ok {
2020-08-14 14:06:53 +00:00
return reflect.ValueOf(ReaderStream{Type: Null, Info: fmt.Sprint(r.N)}), nil
}
if r, ok := r.(*httpreader.HttpReader); ok && r.URL != "" {
2022-05-24 14:22:52 +00:00
return reflect.ValueOf(ReaderStream{Type: HTTP, Info: r.URL}), nil
}
2020-08-14 14:06:53 +00:00
reqID := uuid.New()
u, err := url.Parse(addr)
if err != nil {
return reflect.Value{}, xerrors.Errorf("parsing push address: %w", err)
}
2020-08-14 14:06:53 +00:00
u.Path = path.Join(u.Path, reqID.String())
rpcReader, redir := r.(*RpcReader)
2021-07-30 10:58:28 +00:00
if redir {
// if we have an rpc stream, redirect instead of proxying all the data
redir = rpcReader.redirect(u.String())
}
2020-08-14 14:06:53 +00:00
2021-07-30 10:58:28 +00:00
if !redir {
go func() {
// TODO: figure out errors here
for {
req, err := http.NewRequest("HEAD", u.String(), nil)
if err != nil {
log.Errorf("sending HEAD request for the reder param: %+v", err)
return
}
req.Header.Set("Content-Type", "application/octet-stream")
resp, err := client.Do(req)
if err != nil {
log.Errorf("sending reader param: %+v", err)
return
}
// todo do we need to close the body for a head request?
if resp.StatusCode == http.StatusFound {
nextStr := resp.Header.Get("Location")
u, err = url.Parse(nextStr)
if err != nil {
log.Errorf("sending HEAD request for the reder param, parsing next url (%s): %+v", nextStr, err)
return
}
continue
}
if resp.StatusCode == http.StatusNoContent { // reader closed before reading anything
// todo just return??
return
}
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
2021-07-30 10:58:28 +00:00
log.Errorf("sending reader param (%s): non-200 status: %s, msg: '%s'", u.String(), resp.Status, string(b))
return
}
break
}
// now actually send the data
req, err := http.NewRequest("POST", u.String(), r)
if err != nil {
log.Errorf("sending reader param: %+v", err)
return
}
req.Header.Set("Content-Type", "application/octet-stream")
resp, err := client.Do(req)
if err != nil {
log.Errorf("sending reader param: %+v", err)
return
}
defer resp.Body.Close() //nolint
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
2021-07-30 10:58:28 +00:00
log.Errorf("sending reader param (%s): non-200 status: %s, msg: '%s'", u.String(), resp.Status, string(b))
return
}
}()
}
2020-08-14 14:06:53 +00:00
2021-07-30 10:58:28 +00:00
return reflect.ValueOf(ReaderStream{Type: PushStream, Info: reqID.String()}), nil
})
}
2020-08-14 14:06:53 +00:00
2021-07-30 10:58:28 +00:00
type resType int
2020-08-14 14:06:53 +00:00
2021-07-30 10:58:28 +00:00
const (
resStart resType = iota // send on first read after HEAD
resRedirect // send on redirect before first read after HEAD
resError
2021-07-30 10:58:28 +00:00
// done/closed = close res channel
)
2020-08-14 14:06:53 +00:00
2021-07-30 10:58:28 +00:00
type readRes struct {
rt resType
meta string
2020-08-14 14:06:53 +00:00
}
// RpcReader watches the ReadCloser and closes the res channel when
2021-07-20 13:21:09 +00:00
// either: (1) the ReaderCloser fails on Read (including with a benign error
// like EOF), or (2) when Close is called.
//
// Use it be notified of terminal states, in situations where a Read failure (or
// EOF) is considered a terminal state too (besides Close).
type RpcReader struct {
postBody io.ReadCloser // nil on initial head request
next chan *RpcReader // on head will get us the postBody after sending resStart
mustRedirect bool
2022-04-28 09:50:51 +00:00
eof bool
2021-07-30 10:58:28 +00:00
res chan readRes
beginOnce *sync.Once
closeOnce *sync.Once
2020-08-14 14:06:53 +00:00
}
var ErrHasBody = errors.New("RPCReader has body, either already read from or from a client with no redirect support")
var ErrMustRedirect = errors.New("reader can't be read directly; marked as MustRedirect")
// MustRedirect marks the reader as required to be redirected. Will make local
// calls Read fail. MUST be called before this reader is used in any goroutine.
// If the reader can't be redirected will return ErrHasBody
func (w *RpcReader) MustRedirect() error {
if w.postBody != nil {
w.closeOnce.Do(func() {
w.res <- readRes{
rt: resError,
}
close(w.res)
})
return ErrHasBody
}
w.mustRedirect = true
return nil
}
func (w *RpcReader) beginPost() {
if w.mustRedirect {
w.res <- readRes{
rt: resError,
}
close(w.res)
return
}
2021-07-30 10:58:28 +00:00
if w.postBody == nil {
w.res <- readRes{
rt: resStart,
}
nr := <-w.next
w.postBody = nr.postBody
w.res = nr.res
w.beginOnce = nr.beginOnce
w.closeOnce = nr.closeOnce
2021-07-30 10:58:28 +00:00
}
}
func (w *RpcReader) Read(p []byte) (int, error) {
2021-07-30 10:58:28 +00:00
w.beginOnce.Do(func() {
w.beginPost()
})
2022-04-28 09:50:51 +00:00
if w.eof {
return 0, io.EOF
}
if w.mustRedirect {
return 0, ErrMustRedirect
}
2021-07-30 10:58:28 +00:00
if w.postBody == nil {
return 0, xerrors.Errorf("reader already closed, redirected or cancelled")
2021-07-30 10:58:28 +00:00
}
n, err := w.postBody.Read(p)
2020-08-14 14:06:53 +00:00
if err != nil {
2022-04-28 09:50:51 +00:00
if err == io.EOF {
w.eof = true
}
w.closeOnce.Do(func() {
2021-07-30 10:58:28 +00:00
close(w.res)
})
2020-08-14 14:06:53 +00:00
}
return n, err
}
func (w *RpcReader) Close() error {
w.beginOnce.Do(func() {})
w.closeOnce.Do(func() {
2021-07-30 10:58:28 +00:00
close(w.res)
})
if w.postBody == nil {
return nil
}
2021-07-30 10:58:28 +00:00
return w.postBody.Close()
}
func (w *RpcReader) redirect(to string) bool {
2021-07-30 11:03:31 +00:00
if w.postBody != nil {
return false
}
2021-07-30 10:58:28 +00:00
done := false
w.beginOnce.Do(func() {
w.closeOnce.Do(func() {
w.res <- readRes{
rt: resRedirect,
meta: to,
}
done = true
close(w.res)
})
})
2021-07-30 10:58:28 +00:00
return done
2020-08-14 14:06:53 +00:00
}
func ReaderParamDecoder() (http.HandlerFunc, jsonrpc.ServerOption) {
var readersLk sync.Mutex
readers := map[uuid.UUID]chan *RpcReader{}
2020-08-14 14:06:53 +00:00
2021-07-30 10:58:28 +00:00
// runs on the rpc server side, called by the client before making the jsonrpc request
2020-08-14 14:06:53 +00:00
hnd := func(resp http.ResponseWriter, req *http.Request) {
strId := path.Base(req.URL.Path)
u, err := uuid.Parse(strId)
if err != nil {
http.Error(resp, fmt.Sprintf("parsing reader uuid: %s", err), 400)
2020-08-14 21:49:08 +00:00
return
2020-08-14 14:06:53 +00:00
}
readersLk.Lock()
ch, found := readers[u]
if !found {
ch = make(chan *RpcReader)
2020-08-14 14:06:53 +00:00
readers[u] = ch
}
readersLk.Unlock()
wr := &RpcReader{
2021-07-30 10:58:28 +00:00
res: make(chan readRes),
next: ch,
beginOnce: &sync.Once{},
closeOnce: &sync.Once{},
2021-07-30 10:58:28 +00:00
}
switch req.Method {
case http.MethodHead:
// leave body nil
case http.MethodPost:
wr.postBody = req.Body
default:
http.Error(resp, "unsupported method", http.StatusMethodNotAllowed)
2020-08-14 14:06:53 +00:00
}
2020-08-14 21:12:37 +00:00
tctx, cancel := context.WithTimeout(req.Context(), Timeout)
defer cancel()
2020-08-14 14:06:53 +00:00
select {
case ch <- wr:
2020-08-14 21:12:37 +00:00
case <-tctx.Done():
close(ch)
2020-11-24 11:09:48 +00:00
log.Errorf("context error in reader stream handler (1): %v", tctx.Err())
2020-08-14 14:06:53 +00:00
resp.WriteHeader(500)
return
}
select {
2021-07-30 10:58:28 +00:00
case res, ok := <-wr.res:
if !ok {
if req.Method == http.MethodHead {
resp.WriteHeader(http.StatusNoContent)
} else {
resp.WriteHeader(http.StatusOK)
}
return
}
2021-07-20 13:21:09 +00:00
// TODO should we check if we failed the Read, and if so
2021-07-30 10:58:28 +00:00
// return an HTTP 500? i.e. turn res into a chan error?
switch res.rt {
case resRedirect:
http.Redirect(resp, req, res.meta, http.StatusFound)
case resStart: // responding to HEAD, request POST with reader data
resp.WriteHeader(http.StatusOK)
case resError:
resp.WriteHeader(500)
2021-07-30 10:58:28 +00:00
default:
log.Errorf("unknown res.rt")
resp.WriteHeader(500)
}
return
2020-08-14 14:06:53 +00:00
case <-req.Context().Done():
2020-11-24 11:09:48 +00:00
log.Errorf("context error in reader stream handler (2): %v", req.Context().Err())
closed := make(chan struct{})
// start a draining goroutine
go func() {
for {
select {
case r, ok := <-wr.res:
if !ok {
return
}
log.Errorw("discarding read res", "type", r.rt, "meta", r.meta)
case <-closed:
return
}
}
}()
wr.beginOnce.Do(func() {})
wr.closeOnce.Do(func() {
close(wr.res)
})
close(closed)
2020-08-14 14:06:53 +00:00
resp.WriteHeader(500)
return
}
}
2021-07-30 10:58:28 +00:00
// Server side reader decoder. runs on the rpc server side, invoked when decoding client request parameters. json(ReaderStream{}) -> io.Reader
2020-08-14 14:06:53 +00:00
dec := jsonrpc.WithParamDecoder(new(io.Reader), func(ctx context.Context, b []byte) (reflect.Value, error) {
var rs ReaderStream
if err := json.Unmarshal(b, &rs); err != nil {
return reflect.Value{}, xerrors.Errorf("unmarshaling reader id: %w", err)
}
switch rs.Type {
case Null:
2020-08-14 14:06:53 +00:00
n, err := strconv.ParseInt(rs.Info, 10, 64)
if err != nil {
return reflect.Value{}, xerrors.Errorf("parsing null byte count: %w", err)
}
return reflect.ValueOf(nullreader.NewNullReader(abi.UnpaddedPieceSize(n))), nil
2022-05-24 14:22:52 +00:00
case HTTP:
return reflect.ValueOf(&httpreader.HttpReader{URL: rs.Info}), nil
2020-08-14 14:06:53 +00:00
}
u, err := uuid.Parse(rs.Info)
if err != nil {
return reflect.Value{}, xerrors.Errorf("parsing reader UUDD: %w", err)
}
readersLk.Lock()
ch, found := readers[u]
if !found {
ch = make(chan *RpcReader)
2020-08-14 14:06:53 +00:00
readers[u] = ch
}
readersLk.Unlock()
2020-08-14 21:12:37 +00:00
ctx, cancel := context.WithTimeout(ctx, Timeout)
defer cancel()
2020-08-14 14:06:53 +00:00
select {
2020-08-14 21:12:37 +00:00
case wr, ok := <-ch:
if !ok {
return reflect.Value{}, xerrors.Errorf("handler timed out")
}
2020-08-14 14:06:53 +00:00
return reflect.ValueOf(wr), nil
case <-ctx.Done():
return reflect.Value{}, ctx.Err()
}
})
return hnd, dec
}