forked from cerc-io/ipld-eth-server
492 lines
15 KiB
Go
492 lines
15 KiB
Go
package io
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
|
|
ipld "github.com/ipfs/go-ipld-format"
|
|
mdag "github.com/ipfs/go-merkledag"
|
|
unixfs "github.com/ipfs/go-unixfs"
|
|
)
|
|
|
|
// Common errors
|
|
var (
|
|
ErrIsDir = errors.New("this dag node is a directory")
|
|
ErrCantReadSymlinks = errors.New("cannot currently read symlinks")
|
|
ErrUnkownNodeType = errors.New("unknown node type")
|
|
)
|
|
|
|
// TODO: Rename the `DagReader` interface, this doesn't read *any* DAG, just
|
|
// DAGs with UnixFS node (and it *belongs* to the `unixfs` package). Some
|
|
// alternatives: `FileReader`, `UnixFSFileReader`, `UnixFSReader`.
|
|
|
|
// A DagReader provides read-only read and seek acess to a unixfs file.
|
|
// Different implementations of readers are used for the different
|
|
// types of unixfs/protobuf-encoded nodes.
|
|
type DagReader interface {
|
|
ReadSeekCloser
|
|
Size() uint64
|
|
CtxReadFull(context.Context, []byte) (int, error)
|
|
}
|
|
|
|
// A ReadSeekCloser implements interfaces to read, copy, seek and close.
|
|
type ReadSeekCloser interface {
|
|
io.Reader
|
|
io.Seeker
|
|
io.Closer
|
|
io.WriterTo
|
|
}
|
|
|
|
// NewDagReader creates a new reader object that reads the data represented by
|
|
// the given node, using the passed in DAGService for data retrieval.
|
|
func NewDagReader(ctx context.Context, n ipld.Node, serv ipld.NodeGetter) (DagReader, error) {
|
|
var size uint64
|
|
|
|
switch n := n.(type) {
|
|
case *mdag.RawNode:
|
|
size = uint64(len(n.RawData()))
|
|
|
|
case *mdag.ProtoNode:
|
|
fsNode, err := unixfs.FSNodeFromBytes(n.Data())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch fsNode.Type() {
|
|
case unixfs.TFile, unixfs.TRaw:
|
|
size = fsNode.FileSize()
|
|
|
|
case unixfs.TDirectory, unixfs.THAMTShard:
|
|
// Dont allow reading directories
|
|
return nil, ErrIsDir
|
|
|
|
case unixfs.TMetadata:
|
|
if len(n.Links()) == 0 {
|
|
return nil, errors.New("incorrectly formatted metadata object")
|
|
}
|
|
child, err := n.Links()[0].GetNode(ctx, serv)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
childpb, ok := child.(*mdag.ProtoNode)
|
|
if !ok {
|
|
return nil, mdag.ErrNotProtobuf
|
|
}
|
|
return NewDagReader(ctx, childpb, serv)
|
|
case unixfs.TSymlink:
|
|
return nil, ErrCantReadSymlinks
|
|
default:
|
|
return nil, unixfs.ErrUnrecognizedType
|
|
}
|
|
default:
|
|
return nil, ErrUnkownNodeType
|
|
}
|
|
|
|
ctxWithCancel, cancel := context.WithCancel(ctx)
|
|
|
|
return &dagReader{
|
|
ctx: ctxWithCancel,
|
|
cancel: cancel,
|
|
serv: serv,
|
|
size: size,
|
|
rootNode: n,
|
|
dagWalker: ipld.NewWalker(ctxWithCancel, ipld.NewNavigableIPLDNode(n, serv)),
|
|
}, nil
|
|
}
|
|
|
|
// dagReader provides a way to easily read the data contained in a dag.
|
|
type dagReader struct {
|
|
|
|
// Structure to perform the DAG iteration and search, the reader
|
|
// just needs to add logic to the `Visitor` callback passed to
|
|
// `Iterate` and `Seek`.
|
|
dagWalker *ipld.Walker
|
|
|
|
// Buffer with the data extracted from the current node being visited.
|
|
// To avoid revisiting a node to complete a (potential) partial read
|
|
// (or read after seek) the node's data is fully extracted in a single
|
|
// `readNodeDataBuffer` operation.
|
|
currentNodeData *bytes.Reader
|
|
|
|
// Implements the `Size()` API.
|
|
size uint64
|
|
|
|
// Current offset for the read head within the DAG file.
|
|
offset int64
|
|
|
|
// Root node of the DAG, stored to re-create the `dagWalker` (effectively
|
|
// re-setting the position of the reader, used during `Seek`).
|
|
rootNode ipld.Node
|
|
|
|
// Context passed to the `dagWalker`, the `cancel` function is used to
|
|
// cancel read operations (cancelling requested child node promises,
|
|
// see `ipld.NavigableIPLDNode.FetchChild` for details).
|
|
ctx context.Context
|
|
cancel func()
|
|
|
|
// Passed to the `dagWalker` that will use it to request nodes.
|
|
// TODO: Revisit name.
|
|
serv ipld.NodeGetter
|
|
}
|
|
|
|
// Size returns the total size of the data from the DAG structured file.
|
|
func (dr *dagReader) Size() uint64 {
|
|
return dr.size
|
|
}
|
|
|
|
// Read implements the `io.Reader` interface through the `CtxReadFull`
|
|
// method using the DAG reader's internal context.
|
|
func (dr *dagReader) Read(b []byte) (int, error) {
|
|
return dr.CtxReadFull(dr.ctx, b)
|
|
}
|
|
|
|
// CtxReadFull reads data from the DAG structured file. It always
|
|
// attempts a full read of the DAG until the `out` buffer is full.
|
|
// It uses the `Walker` structure to iterate the file DAG and read
|
|
// every node's data into the `out` buffer.
|
|
func (dr *dagReader) CtxReadFull(ctx context.Context, out []byte) (n int, err error) {
|
|
// Set the `dagWalker`'s context to the `ctx` argument, it will be used
|
|
// to fetch the child node promises (see
|
|
// `ipld.NavigableIPLDNode.FetchChild` for details).
|
|
dr.dagWalker.SetContext(ctx)
|
|
|
|
// If there was a partially read buffer from the last visited
|
|
// node read it before visiting a new one.
|
|
if dr.currentNodeData != nil {
|
|
// TODO: Move this check inside `readNodeDataBuffer`?
|
|
n = dr.readNodeDataBuffer(out)
|
|
|
|
if n == len(out) {
|
|
return n, nil
|
|
// Output buffer full, no need to traverse the DAG.
|
|
}
|
|
}
|
|
|
|
// Iterate the DAG calling the passed `Visitor` function on every node
|
|
// to read its data into the `out` buffer, stop if there is an error or
|
|
// if the entire DAG is traversed (`EndOfDag`).
|
|
err = dr.dagWalker.Iterate(func(visitedNode ipld.NavigableNode) error {
|
|
node := ipld.ExtractIPLDNode(visitedNode)
|
|
|
|
// Skip internal nodes, they shouldn't have any file data
|
|
// (see the `balanced` package for more details).
|
|
if len(node.Links()) > 0 {
|
|
return nil
|
|
}
|
|
|
|
err = dr.saveNodeData(node)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Save the leaf node file data in a buffer in case it is only
|
|
// partially read now and future `CtxReadFull` calls reclaim the
|
|
// rest (as each node is visited only once during `Iterate`).
|
|
//
|
|
// TODO: We could check if the entire node's data can fit in the
|
|
// remaining `out` buffer free space to skip this intermediary step.
|
|
|
|
n += dr.readNodeDataBuffer(out[n:])
|
|
|
|
if n == len(out) {
|
|
// Output buffer full, no need to keep traversing the DAG,
|
|
// signal the `Walker` to pause the iteration.
|
|
dr.dagWalker.Pause()
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if err == ipld.EndOfDag {
|
|
return n, io.EOF
|
|
// Reached the end of the (DAG) file, no more data to read.
|
|
} else if err != nil {
|
|
return n, err
|
|
// Pass along any other errors from the `Visitor`.
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
// Save the UnixFS `node`'s data into the internal `currentNodeData` buffer to
|
|
// later move it to the output buffer (`Read`) or seek into it (`Seek`).
|
|
func (dr *dagReader) saveNodeData(node ipld.Node) error {
|
|
extractedNodeData, err := unixfs.ReadUnixFSNodeData(node)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
dr.currentNodeData = bytes.NewReader(extractedNodeData)
|
|
return nil
|
|
}
|
|
|
|
// Read the `currentNodeData` buffer into `out`. This function can't have
|
|
// any errors as it's always reading from a `bytes.Reader` and asking only
|
|
// the available data in it.
|
|
func (dr *dagReader) readNodeDataBuffer(out []byte) int {
|
|
|
|
n, _ := dr.currentNodeData.Read(out)
|
|
// Ignore the error as the EOF may not be returned in the first
|
|
// `Read` call, explicitly ask for an empty buffer below to check
|
|
// if we've reached the end.
|
|
|
|
if dr.currentNodeData.Len() == 0 {
|
|
dr.currentNodeData = nil
|
|
// Signal that the buffer was consumed (for later `Read` calls).
|
|
// This shouldn't return an EOF error as it's just the end of a
|
|
// single node's data, not the entire DAG.
|
|
}
|
|
|
|
dr.offset += int64(n)
|
|
// TODO: Should `offset` be incremented here or in the calling function?
|
|
// (Doing it here saves LoC but may be confusing as it's more hidden).
|
|
|
|
return n
|
|
}
|
|
|
|
// Similar to `readNodeDataBuffer` but it writes the contents to
|
|
// an `io.Writer` argument.
|
|
//
|
|
// TODO: Check what part of the logic between the two functions
|
|
// can be extracted away.
|
|
func (dr *dagReader) writeNodeDataBuffer(w io.Writer) (int64, error) {
|
|
|
|
n, err := dr.currentNodeData.WriteTo(w)
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
|
|
if dr.currentNodeData.Len() == 0 {
|
|
dr.currentNodeData = nil
|
|
// Signal that the buffer was consumed (for later `Read` calls).
|
|
// This shouldn't return an EOF error as it's just the end of a
|
|
// single node's data, not the entire DAG.
|
|
}
|
|
|
|
dr.offset += int64(n)
|
|
return n, nil
|
|
}
|
|
|
|
// WriteTo writes to the given writer.
|
|
// This follows the `bytes.Reader.WriteTo` implementation
|
|
// where it starts from the internal index that may have
|
|
// been modified by other `Read` calls.
|
|
//
|
|
// TODO: This implementation is very similar to `CtxReadFull`,
|
|
// the common parts should be abstracted away.
|
|
func (dr *dagReader) WriteTo(w io.Writer) (n int64, err error) {
|
|
// Use the internal reader's context to fetch the child node promises
|
|
// (see `ipld.NavigableIPLDNode.FetchChild` for details).
|
|
dr.dagWalker.SetContext(dr.ctx)
|
|
|
|
// If there was a partially read buffer from the last visited
|
|
// node read it before visiting a new one.
|
|
if dr.currentNodeData != nil {
|
|
n, err = dr.writeNodeDataBuffer(w)
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
}
|
|
|
|
// Iterate the DAG calling the passed `Visitor` function on every node
|
|
// to read its data into the `out` buffer, stop if there is an error or
|
|
// if the entire DAG is traversed (`EndOfDag`).
|
|
err = dr.dagWalker.Iterate(func(visitedNode ipld.NavigableNode) error {
|
|
node := ipld.ExtractIPLDNode(visitedNode)
|
|
|
|
// Skip internal nodes, they shouldn't have any file data
|
|
// (see the `balanced` package for more details).
|
|
if len(node.Links()) > 0 {
|
|
return nil
|
|
}
|
|
|
|
err = dr.saveNodeData(node)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Save the leaf node file data in a buffer in case it is only
|
|
// partially read now and future `CtxReadFull` calls reclaim the
|
|
// rest (as each node is visited only once during `Iterate`).
|
|
|
|
written, err := dr.writeNodeDataBuffer(w)
|
|
n += written
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if err == ipld.EndOfDag {
|
|
return n, nil
|
|
}
|
|
|
|
return n, err
|
|
}
|
|
|
|
// Close the reader (cancelling fetch node operations requested with
|
|
// the internal context, that is, `Read` calls but not `CtxReadFull`
|
|
// with user-supplied contexts).
|
|
func (dr *dagReader) Close() error {
|
|
dr.cancel()
|
|
return nil
|
|
}
|
|
|
|
// Seek implements `io.Seeker` seeking to a given offset in the DAG file,
|
|
// it matches the standard unix `seek`. It moves the position of the internal
|
|
// `dagWalker` and may also leave a `currentNodeData` buffer loaded in case
|
|
// the seek is performed to the middle of the data in a node.
|
|
//
|
|
// TODO: Support seeking from the current position (relative seek)
|
|
// through the `dagWalker` in `io.SeekCurrent`.
|
|
func (dr *dagReader) Seek(offset int64, whence int) (int64, error) {
|
|
switch whence {
|
|
case io.SeekStart:
|
|
if offset < 0 {
|
|
return -1, errors.New("invalid offset")
|
|
}
|
|
|
|
if offset == dr.offset {
|
|
return offset, nil
|
|
// Already at the requested `offset`, nothing to do.
|
|
}
|
|
|
|
left := offset
|
|
// Amount left to seek.
|
|
|
|
// Seek from the beginning of the DAG.
|
|
dr.resetPosition()
|
|
|
|
// Use the internal reader's context to fetch the child node promises
|
|
// (see `ipld.NavigableIPLDNode.FetchChild` for details).
|
|
dr.dagWalker.SetContext(dr.ctx)
|
|
// TODO: Performance: we could adjust here `preloadSize` of
|
|
// `ipld.NavigableIPLDNode` also, when seeking we only want
|
|
// to fetch one child at a time.
|
|
|
|
// Seek the DAG by calling the provided `Visitor` function on every
|
|
// node the `dagWalker` descends to while searching which can be
|
|
// either an internal or leaf node. In the internal node case, check
|
|
// the child node sizes and set the corresponding child index to go
|
|
// down to next. In the leaf case (last visit of the search), if there
|
|
// is still an amount `left` to seek do it inside the node's data
|
|
// saved in the `currentNodeData` buffer, leaving it ready for a `Read`
|
|
// call.
|
|
err := dr.dagWalker.Seek(func(visitedNode ipld.NavigableNode) error {
|
|
node := ipld.ExtractIPLDNode(visitedNode)
|
|
|
|
if len(node.Links()) > 0 {
|
|
// Internal node, should be a `mdag.ProtoNode` containing a
|
|
// `unixfs.FSNode` (see the `balanced` package for more details).
|
|
fsNode, err := unixfs.ExtractFSNode(node)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// If there aren't enough size hints don't seek
|
|
// (see the `io.EOF` handling error comment below).
|
|
if fsNode.NumChildren() != len(node.Links()) {
|
|
return io.EOF
|
|
}
|
|
|
|
// Internal nodes have no data, so just iterate through the
|
|
// sizes of its children (advancing the child index of the
|
|
// `dagWalker`) to find where we need to go down to next in
|
|
// the search.
|
|
for {
|
|
childSize := fsNode.BlockSize(int(dr.dagWalker.ActiveChildIndex()))
|
|
|
|
if childSize > uint64(left) {
|
|
// This child's data contains the position requested
|
|
// in `offset`, go down this child.
|
|
return nil
|
|
}
|
|
|
|
// Else, skip this child.
|
|
left -= int64(childSize)
|
|
err := dr.dagWalker.NextChild()
|
|
if err == ipld.ErrNextNoChild {
|
|
// No more child nodes available, nothing to do,
|
|
// the `Seek` will stop on its own.
|
|
return nil
|
|
} else if err != nil {
|
|
return err
|
|
// Pass along any other errors (that may in future
|
|
// implementations be returned by `Next`) to stop
|
|
// the search.
|
|
}
|
|
}
|
|
|
|
} else {
|
|
// Leaf node, seek inside its data.
|
|
err := dr.saveNodeData(node)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err = dr.currentNodeData.Seek(left, io.SeekStart)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// The corner case of a DAG consisting only of a single (leaf)
|
|
// node should make no difference here. In that case, where the
|
|
// node doesn't have a parent UnixFS node with size hints, this
|
|
// implementation would allow this `Seek` to be called with an
|
|
// argument larger than the buffer size which normally wouldn't
|
|
// happen (because we would skip the node based on the size
|
|
// hint) but that would just mean that a future `CtxReadFull`
|
|
// call would read no data from the `currentNodeData` buffer.
|
|
// TODO: Re-check this reasoning.
|
|
|
|
return nil
|
|
// In the leaf node case the search will stop here.
|
|
}
|
|
})
|
|
|
|
if err == io.EOF {
|
|
// TODO: Taken from https://github.com/ipfs/go-ipfs/pull/4320,
|
|
// check if still valid.
|
|
// Return negative number if we can't figure out the file size. Using io.EOF
|
|
// for this seems to be good(-enough) solution as it's only returned by
|
|
// precalcNextBuf when we step out of file range.
|
|
// This is needed for gateway to function properly
|
|
return -1, nil
|
|
}
|
|
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
dr.offset = offset
|
|
return dr.offset, nil
|
|
|
|
case io.SeekCurrent:
|
|
if offset == 0 {
|
|
return dr.offset, nil
|
|
}
|
|
|
|
return dr.Seek(dr.offset+offset, io.SeekStart)
|
|
// TODO: Performance. This can be improved supporting relative
|
|
// searches in the `Walker` (see `Walker.Seek`).
|
|
|
|
case io.SeekEnd:
|
|
return dr.Seek(int64(dr.Size())+offset, io.SeekStart)
|
|
|
|
default:
|
|
return 0, errors.New("invalid whence")
|
|
}
|
|
}
|
|
|
|
// Reset the reader position by resetting the `dagWalker` and discarding
|
|
// any partially used node's data in the `currentNodeData` buffer, used
|
|
// in the `SeekStart` case.
|
|
func (dr *dagReader) resetPosition() {
|
|
dr.currentNodeData = nil
|
|
|
|
dr.dagWalker = ipld.NewWalker(dr.ctx, ipld.NewNavigableIPLDNode(dr.rootNode, dr.serv))
|
|
// TODO: This could be avoided (along with storing the `dr.rootNode` and
|
|
// `dr.serv` just for this call) if `Reset` is supported in the `Walker`.
|
|
}
|