forked from cerc-io/ipld-eth-server
410 lines
12 KiB
Go
410 lines
12 KiB
Go
package helpers
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"os"
|
|
|
|
dag "github.com/ipfs/go-merkledag"
|
|
|
|
ft "github.com/ipfs/go-unixfs"
|
|
pb "github.com/ipfs/go-unixfs/pb"
|
|
|
|
cid "github.com/ipfs/go-cid"
|
|
chunker "github.com/ipfs/go-ipfs-chunker"
|
|
files "github.com/ipfs/go-ipfs-files"
|
|
pi "github.com/ipfs/go-ipfs-posinfo"
|
|
ipld "github.com/ipfs/go-ipld-format"
|
|
)
|
|
|
|
var ErrMissingFsRef = errors.New("missing file path or URL, can't create filestore reference")
|
|
|
|
// DagBuilderHelper wraps together a bunch of objects needed to
|
|
// efficiently create unixfs dag trees
|
|
type DagBuilderHelper struct {
|
|
dserv ipld.DAGService
|
|
spl chunker.Splitter
|
|
recvdErr error
|
|
rawLeaves bool
|
|
nextData []byte // the next item to return.
|
|
maxlinks int
|
|
cidBuilder cid.Builder
|
|
|
|
// Filestore support variables.
|
|
// ----------------------------
|
|
// TODO: Encapsulate in `FilestoreNode` (which is basically what they are).
|
|
//
|
|
// Besides having the path this variable (if set) is used as a flag
|
|
// to indicate that Filestore should be used.
|
|
fullPath string
|
|
stat os.FileInfo
|
|
// Keeps track of the current file size added to the DAG (used in
|
|
// the balanced builder). It is assumed that the `DagBuilderHelper`
|
|
// is not reused to construct another DAG, but a new one (with a
|
|
// zero `offset`) is created.
|
|
offset uint64
|
|
}
|
|
|
|
// DagBuilderParams wraps configuration options to create a DagBuilderHelper
|
|
// from a chunker.Splitter.
|
|
type DagBuilderParams struct {
|
|
// Maximum number of links per intermediate node
|
|
Maxlinks int
|
|
|
|
// RawLeaves signifies that the importer should use raw ipld nodes as leaves
|
|
// instead of using the unixfs TRaw type
|
|
RawLeaves bool
|
|
|
|
// CID Builder to use if set
|
|
CidBuilder cid.Builder
|
|
|
|
// DAGService to write blocks to (required)
|
|
Dagserv ipld.DAGService
|
|
|
|
// NoCopy signals to the chunker that it should track fileinfo for
|
|
// filestore adds
|
|
NoCopy bool
|
|
|
|
// URL if non-empty (and NoCopy is also true) indicates that the
|
|
// file will not be stored in the datastore but instead retrieved
|
|
// from this location via the urlstore.
|
|
URL string
|
|
}
|
|
|
|
// New generates a new DagBuilderHelper from the given params and a given
|
|
// chunker.Splitter as data source.
|
|
func (dbp *DagBuilderParams) New(spl chunker.Splitter) (*DagBuilderHelper, error) {
|
|
db := &DagBuilderHelper{
|
|
dserv: dbp.Dagserv,
|
|
spl: spl,
|
|
rawLeaves: dbp.RawLeaves,
|
|
cidBuilder: dbp.CidBuilder,
|
|
maxlinks: dbp.Maxlinks,
|
|
}
|
|
if fi, ok := spl.Reader().(files.FileInfo); dbp.NoCopy && ok {
|
|
db.fullPath = fi.AbsPath()
|
|
db.stat = fi.Stat()
|
|
}
|
|
|
|
if dbp.URL != "" && dbp.NoCopy {
|
|
db.fullPath = dbp.URL
|
|
}
|
|
|
|
if dbp.NoCopy && db.fullPath == "" { // Enforce NoCopy
|
|
return nil, ErrMissingFsRef
|
|
}
|
|
|
|
return db, nil
|
|
}
|
|
|
|
// prepareNext consumes the next item from the splitter and puts it
|
|
// in the nextData field. it is idempotent-- if nextData is full
|
|
// it will do nothing.
|
|
func (db *DagBuilderHelper) prepareNext() {
|
|
// if we already have data waiting to be consumed, we're ready
|
|
if db.nextData != nil || db.recvdErr != nil {
|
|
return
|
|
}
|
|
|
|
db.nextData, db.recvdErr = db.spl.NextBytes()
|
|
if db.recvdErr == io.EOF {
|
|
db.recvdErr = nil
|
|
}
|
|
}
|
|
|
|
// Done returns whether or not we're done consuming the incoming data.
|
|
func (db *DagBuilderHelper) Done() bool {
|
|
// ensure we have an accurate perspective on data
|
|
// as `done` this may be called before `next`.
|
|
db.prepareNext() // idempotent
|
|
if db.recvdErr != nil {
|
|
return false
|
|
}
|
|
return db.nextData == nil
|
|
}
|
|
|
|
// Next returns the next chunk of data to be inserted into the dag
|
|
// if it returns nil, that signifies that the stream is at an end, and
|
|
// that the current building operation should finish.
|
|
func (db *DagBuilderHelper) Next() ([]byte, error) {
|
|
db.prepareNext() // idempotent
|
|
d := db.nextData
|
|
db.nextData = nil // signal we've consumed it
|
|
if db.recvdErr != nil {
|
|
return nil, db.recvdErr
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// GetDagServ returns the dagservice object this Helper is using
|
|
func (db *DagBuilderHelper) GetDagServ() ipld.DAGService {
|
|
return db.dserv
|
|
}
|
|
|
|
// GetCidBuilder returns the internal `cid.CidBuilder` set in the builder.
|
|
func (db *DagBuilderHelper) GetCidBuilder() cid.Builder {
|
|
return db.cidBuilder
|
|
}
|
|
|
|
// NewLeafNode creates a leaf node filled with data. If rawLeaves is
|
|
// defined then a raw leaf will be returned. Otherwise, it will create
|
|
// and return `FSNodeOverDag` with `fsNodeType`.
|
|
func (db *DagBuilderHelper) NewLeafNode(data []byte, fsNodeType pb.Data_DataType) (ipld.Node, error) {
|
|
if len(data) > BlockSizeLimit {
|
|
return nil, ErrSizeLimitExceeded
|
|
}
|
|
|
|
if db.rawLeaves {
|
|
// Encapsulate the data in a raw node.
|
|
if db.cidBuilder == nil {
|
|
return dag.NewRawNode(data), nil
|
|
}
|
|
rawnode, err := dag.NewRawNodeWPrefix(data, db.cidBuilder)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return rawnode, nil
|
|
}
|
|
|
|
// Encapsulate the data in UnixFS node (instead of a raw node).
|
|
fsNodeOverDag := db.NewFSNodeOverDag(fsNodeType)
|
|
fsNodeOverDag.SetFileData(data)
|
|
node, err := fsNodeOverDag.Commit()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// TODO: Encapsulate this sequence of calls into a function that
|
|
// just returns the final `ipld.Node` avoiding going through
|
|
// `FSNodeOverDag`.
|
|
|
|
return node, nil
|
|
}
|
|
|
|
// FillNodeLayer will add datanodes as children to the give node until
|
|
// it is full in this layer or no more data.
|
|
// NOTE: This function creates raw data nodes so it only works
|
|
// for the `trickle.Layout`.
|
|
func (db *DagBuilderHelper) FillNodeLayer(node *FSNodeOverDag) error {
|
|
|
|
// while we have room AND we're not done
|
|
for node.NumChildren() < db.maxlinks && !db.Done() {
|
|
child, childFileSize, err := db.NewLeafDataNode(ft.TRaw)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := node.AddChild(child, childFileSize, db); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
node.Commit()
|
|
// TODO: Do we need to commit here? The caller who created the
|
|
// `FSNodeOverDag` should be in charge of that.
|
|
|
|
return nil
|
|
}
|
|
|
|
// NewLeafDataNode builds the `node` with the data obtained from the
|
|
// Splitter with the given constraints (BlockSizeLimit, RawLeaves)
|
|
// specified when creating the DagBuilderHelper. It returns
|
|
// `ipld.Node` with the `dataSize` (that will be used to keep track of
|
|
// the DAG file size). The size of the data is computed here because
|
|
// after that it will be hidden by `NewLeafNode` inside a generic
|
|
// `ipld.Node` representation.
|
|
func (db *DagBuilderHelper) NewLeafDataNode(fsNodeType pb.Data_DataType) (node ipld.Node, dataSize uint64, err error) {
|
|
fileData, err := db.Next()
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
dataSize = uint64(len(fileData))
|
|
|
|
// Create a new leaf node containing the file chunk data.
|
|
node, err = db.NewLeafNode(fileData, fsNodeType)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
// Convert this leaf to a `FilestoreNode` if needed.
|
|
node = db.ProcessFileStore(node, dataSize)
|
|
|
|
return node, dataSize, nil
|
|
}
|
|
|
|
// ProcessFileStore generates, if Filestore is being used, the
|
|
// `FilestoreNode` representation of the `ipld.Node` that
|
|
// contains the file data. If Filestore is not being used just
|
|
// return the same node to continue with its addition to the DAG.
|
|
//
|
|
// The `db.offset` is updated at this point (instead of when
|
|
// `NewLeafDataNode` is called, both work in tandem but the
|
|
// offset is more related to this function).
|
|
func (db *DagBuilderHelper) ProcessFileStore(node ipld.Node, dataSize uint64) ipld.Node {
|
|
// Check if Filestore is being used.
|
|
if db.fullPath != "" {
|
|
// Check if the node is actually a raw node (needed for
|
|
// Filestore support).
|
|
if _, ok := node.(*dag.RawNode); ok {
|
|
fn := &pi.FilestoreNode{
|
|
Node: node,
|
|
PosInfo: &pi.PosInfo{
|
|
Offset: db.offset,
|
|
FullPath: db.fullPath,
|
|
Stat: db.stat,
|
|
},
|
|
}
|
|
|
|
// Update `offset` with the size of the data generated by `db.Next`.
|
|
db.offset += dataSize
|
|
|
|
return fn
|
|
}
|
|
}
|
|
|
|
// Filestore is not used, return the same `node` argument.
|
|
return node
|
|
}
|
|
|
|
// Add inserts the given node in the DAGService.
|
|
func (db *DagBuilderHelper) Add(node ipld.Node) error {
|
|
return db.dserv.Add(context.TODO(), node)
|
|
}
|
|
|
|
// Maxlinks returns the configured maximum number for links
|
|
// for nodes built with this helper.
|
|
func (db *DagBuilderHelper) Maxlinks() int {
|
|
return db.maxlinks
|
|
}
|
|
|
|
// FSNodeOverDag encapsulates an `unixfs.FSNode` that will be stored in a
|
|
// `dag.ProtoNode`. Instead of just having a single `ipld.Node` that
|
|
// would need to be constantly (un)packed to access and modify its
|
|
// internal `FSNode` in the process of creating a UnixFS DAG, this
|
|
// structure stores an `FSNode` cache to manipulate it (add child nodes)
|
|
// directly , and only when the node has reached its final (immutable) state
|
|
// (signaled by calling `Commit()`) is it committed to a single (indivisible)
|
|
// `ipld.Node`.
|
|
//
|
|
// It is used mainly for internal (non-leaf) nodes, and for some
|
|
// representations of data leaf nodes (that don't use raw nodes or
|
|
// Filestore).
|
|
//
|
|
// It aims to replace the `UnixfsNode` structure which encapsulated too
|
|
// many possible node state combinations.
|
|
//
|
|
// TODO: Revisit the name.
|
|
type FSNodeOverDag struct {
|
|
dag *dag.ProtoNode
|
|
file *ft.FSNode
|
|
}
|
|
|
|
// NewFSNodeOverDag creates a new `dag.ProtoNode` and `ft.FSNode`
|
|
// decoupled from one onther (and will continue in that way until
|
|
// `Commit` is called), with `fsNodeType` specifying the type of
|
|
// the UnixFS layer node (either `File` or `Raw`).
|
|
func (db *DagBuilderHelper) NewFSNodeOverDag(fsNodeType pb.Data_DataType) *FSNodeOverDag {
|
|
node := new(FSNodeOverDag)
|
|
node.dag = new(dag.ProtoNode)
|
|
node.dag.SetCidBuilder(db.GetCidBuilder())
|
|
|
|
node.file = ft.NewFSNode(fsNodeType)
|
|
|
|
return node
|
|
}
|
|
|
|
// NewFSNFromDag reconstructs a FSNodeOverDag node from a given dag node
|
|
func (db *DagBuilderHelper) NewFSNFromDag(nd *dag.ProtoNode) (*FSNodeOverDag, error) {
|
|
return NewFSNFromDag(nd)
|
|
}
|
|
|
|
// NewFSNFromDag reconstructs a FSNodeOverDag node from a given dag node
|
|
func NewFSNFromDag(nd *dag.ProtoNode) (*FSNodeOverDag, error) {
|
|
mb, err := ft.FSNodeFromBytes(nd.Data())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &FSNodeOverDag{
|
|
dag: nd,
|
|
file: mb,
|
|
}, nil
|
|
}
|
|
|
|
// AddChild adds a `child` `ipld.Node` to both node layers. The
|
|
// `dag.ProtoNode` creates a link to the child node while the
|
|
// `ft.FSNode` stores its file size (that is, not the size of the
|
|
// node but the size of the file data that it is storing at the
|
|
// UnixFS layer). The child is also stored in the `DAGService`.
|
|
func (n *FSNodeOverDag) AddChild(child ipld.Node, fileSize uint64, db *DagBuilderHelper) error {
|
|
err := n.dag.AddNodeLink("", child)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
n.file.AddBlockSize(fileSize)
|
|
|
|
return db.Add(child)
|
|
}
|
|
|
|
// RemoveChild deletes the child node at the given index.
|
|
func (n *FSNodeOverDag) RemoveChild(index int, dbh *DagBuilderHelper) {
|
|
n.file.RemoveBlockSize(index)
|
|
n.dag.SetLinks(append(n.dag.Links()[:index], n.dag.Links()[index+1:]...))
|
|
}
|
|
|
|
// Commit unifies (resolves) the cache nodes into a single `ipld.Node`
|
|
// that represents them: the `ft.FSNode` is encoded inside the
|
|
// `dag.ProtoNode`.
|
|
//
|
|
// TODO: Make it read-only after committing, allow to commit only once.
|
|
func (n *FSNodeOverDag) Commit() (ipld.Node, error) {
|
|
fileData, err := n.file.GetBytes()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
n.dag.SetData(fileData)
|
|
|
|
return n.dag, nil
|
|
}
|
|
|
|
// NumChildren returns the number of children of the `ft.FSNode`.
|
|
func (n *FSNodeOverDag) NumChildren() int {
|
|
return n.file.NumChildren()
|
|
}
|
|
|
|
// FileSize returns the `Filesize` attribute from the underlying
|
|
// representation of the `ft.FSNode`.
|
|
func (n *FSNodeOverDag) FileSize() uint64 {
|
|
return n.file.FileSize()
|
|
}
|
|
|
|
// SetFileData stores the `fileData` in the `ft.FSNode`. It
|
|
// should be used only when `FSNodeOverDag` represents a leaf
|
|
// node (internal nodes don't carry data, just file sizes).
|
|
func (n *FSNodeOverDag) SetFileData(fileData []byte) {
|
|
n.file.SetData(fileData)
|
|
}
|
|
|
|
// GetDagNode fills out the proper formatting for the FSNodeOverDag node
|
|
// inside of a DAG node and returns the dag node.
|
|
// TODO: Check if we have committed (passed the UnixFS information
|
|
// to the DAG layer) before returning this.
|
|
func (n *FSNodeOverDag) GetDagNode() (ipld.Node, error) {
|
|
return n.dag, nil
|
|
}
|
|
|
|
// GetChild gets the ith child of this node from the given DAGService.
|
|
func (n *FSNodeOverDag) GetChild(ctx context.Context, i int, ds ipld.DAGService) (*FSNodeOverDag, error) {
|
|
nd, err := n.dag.Links()[i].GetNode(ctx, ds)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pbn, ok := nd.(*dag.ProtoNode)
|
|
if !ok {
|
|
return nil, dag.ErrNotProtobuf
|
|
}
|
|
|
|
return NewFSNFromDag(pbn)
|
|
}
|