chainstore: Fix raw blocks getting scanned for links during snapshots (#10684)

We have to save raw blocks to the snapshot, but we should not be scanning them
for additional links as if they were CBOR blocks.

This cleans the logic a bit (we were checking that the parent was a CBOR block
before queueing up the children, but then scanning the children... it was weird).

Additionally, more verbose logging is added for the next time ScanForLinks
fails (currently very little info was given).

Our ScanForLinks callback should only enqueue CBOR for further processing.
This commit is contained in:
Hector Sanjuan 2023-04-22 00:16:26 +02:00 committed by GitHub
parent 9d5d6a5f3d
commit 875c09840b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -371,6 +371,9 @@ func (s *walkScheduler) enqueueIfNew(task walkTask) {
//log.Infow("ignored", "cid", todo.c.String())
return
}
// This lets through RAW and CBOR blocks, the only two types that we
// end up writing to the exported CAR.
if task.c.Prefix().Codec != cid.Raw && task.c.Prefix().Codec != cid.DagCBOR {
//log.Infow("ignored", "cid", todo.c.String())
return
@ -442,10 +445,19 @@ func (s *walkScheduler) processTask(t walkTask, workerN int) error {
b: blk,
}
// We exported the ipld block. If it wasn't a CBOR block, there's nothing
// else to do and we can bail out early as it won't have any links
// etc.
if t.c.Prefix().Codec != cid.DagCBOR || t.c.Prefix().MhType == mh.IDENTITY {
return nil
}
rawData := blk.RawData()
// extract relevant dags to walk from the block
if t.taskType == blockTask {
var b types.BlockHeader
if err := b.UnmarshalCBOR(bytes.NewBuffer(blk.RawData())); err != nil {
if err := b.UnmarshalCBOR(bytes.NewBuffer(rawData)); err != nil {
return xerrors.Errorf("unmarshalling block header (cid=%s): %w", blk, err)
}
if b.Height%1_000 == 0 {
@ -521,11 +533,7 @@ func (s *walkScheduler) processTask(t walkTask, workerN int) error {
}
// Not a chain-block: we scan for CIDs in the raw block-data
return cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) {
if t.c.Prefix().Codec != cid.DagCBOR || t.c.Prefix().MhType == mh.IDENTITY {
return
}
err = cbg.ScanForLinks(bytes.NewReader(rawData), func(c cid.Cid) {
s.enqueueIfNew(walkTask{
c: c,
taskType: dagTask,
@ -534,6 +542,13 @@ func (s *walkScheduler) processTask(t walkTask, workerN int) error {
epoch: t.epoch,
})
})
if err != nil {
return xerrors.Errorf(
"ScanForLinks(%s). Task: %s. Block: %s (%s). Epoch: %d. Err: %w",
t.c, t.taskType, t.topLevelTaskType, t.blockCid, t.epoch, err)
}
return nil
}
func (cs *ChainStore) ExportRange(