Skip to content

Commit 08a8582

Browse files
authored
feat: add NextReader to BlockReader (#603)
* Add NextReader to BlockReader BlockReader.NextReader facilities reading larger blocks from the CAR file. Signed-off-by: Jakub Sztandera <oss@kubuxu.com>
1 parent 3272f9c commit 08a8582

File tree

3 files changed

+67
-15
lines changed

3 files changed

+67
-15
lines changed

v2/block_reader.go

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"io"
7+
"math"
78

89
blocks "github.com/ipfs/go-block-format"
910
"github.com/ipfs/go-cid"
@@ -141,6 +142,25 @@ func (br *BlockReader) Next() (blocks.Block, error) {
141142
return blocks.NewBlockWithCid(data, c)
142143
}
143144

145+
// NextReader returns a CID, io.Reader and length of what should be the next block
146+
// the CID itself is not verified, and the reader is limited to the size of the block
147+
// The user of this function HAS TO consume all of the bytes in the returned reader before using any other function
148+
// on the BlockReader.
149+
// The returned length might be larger than MaxAllowedSectionSize, it is up to the user to check before loading the data into memory.
150+
func (br *BlockReader) NextReader() (cid.Cid, io.Reader, uint64, error) {
151+
// we pass Math.MaxInt64 as io.LimitReader doesn't support uint64
152+
// and we want unlimited size, as it is for the user of the function to read blocks of data without OOMing
153+
c, length, err := util.ReadNodeHeader(br.r, br.opts.ZeroLengthSectionAsEOF, math.MaxInt64)
154+
if err != nil {
155+
return cid.Undef, nil, 0, err
156+
}
157+
limitReader := io.LimitReader(br.r, int64(length))
158+
159+
ss := uint64(c.ByteLen()) + length
160+
br.offset += uint64(varint.UvarintSize(ss)) + ss
161+
return c, limitReader, length, nil
162+
}
163+
144164
// BlockMetadata contains metadata about a block's section in a CAR file/stream.
145165
//
146166
// There are two offsets for the block section which will be the same if the
@@ -171,26 +191,14 @@ type BlockMetadata struct {
171191
// If the underlying reader used by the BlockReader is actually a ReadSeeker, this method will attempt to
172192
// seek over the underlying data rather than reading it into memory.
173193
func (br *BlockReader) SkipNext() (*BlockMetadata, error) {
174-
sectionSize, err := util.LdReadSize(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize)
194+
c, blockSize, err := util.ReadNodeHeader(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize)
175195
if err != nil {
176196
return nil, err
177197
}
178-
if sectionSize == 0 {
179-
_, _, err := cid.CidFromBytes([]byte{}) // generate zero-byte CID error
180-
if err == nil {
181-
panic("expected zero-byte CID error")
182-
}
183-
return nil, err
184-
}
185-
198+
cidSize := uint64(c.ByteLen())
199+
sectionSize := blockSize + cidSize
186200
lenSize := uint64(varint.UvarintSize(sectionSize))
187201

188-
cidSize, c, err := cid.CidFromReader(io.LimitReader(br.r, int64(sectionSize)))
189-
if err != nil {
190-
return nil, err
191-
}
192-
193-
blockSize := sectionSize - uint64(cidSize)
194202
blockOffset := br.offset
195203

196204
// move our reader forward; either by seeking or slurping

v2/block_reader_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,23 @@ func TestBlockReader(t *testing.T) {
352352
}
353353
_, err = car.Next()
354354
req.ErrorIs(err, io.EOF)
355+
356+
car, err = carv2.NewBlockReader(testCase.reader())
357+
req.NoError(err)
358+
req.ElementsMatch(roots, car.Roots)
359+
360+
for i := 0; i < 100; i++ {
361+
cid, r, length, err := car.NextReader()
362+
req.NoError(err)
363+
req.Equal(blks[i].block.Cid(), cid)
364+
req.Equal(uint64(len(blks[i].block.RawData())), length)
365+
data := make([]byte, length)
366+
_, err = io.ReadFull(r, data)
367+
req.NoError(err)
368+
req.Equal(blks[i].block.RawData(), data)
369+
}
370+
_, err = car.Next()
371+
req.ErrorIs(err, io.EOF)
355372
})
356373
}
357374
}

v2/internal/carv1/util/util.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package util
33
import (
44
"errors"
55
"io"
6+
"math"
67

78
internalio "github.com/ipld/go-car/v2/internal/io"
89

@@ -33,6 +34,32 @@ func ReadNode(r io.Reader, zeroLenAsEOF bool, maxReadBytes uint64) (cid.Cid, []b
3334
return c, data[n:], nil
3435
}
3536

37+
// ReadNodeHeader returns the specified CID of the node and the length of data to be read.
38+
func ReadNodeHeader(r io.Reader, zeroLenAsEOF bool, maxReadBytes uint64) (cid.Cid, uint64, error) {
39+
maxReadBytes = min(maxReadBytes, math.MaxInt64) // io.LimitReader doesn't support uint64
40+
41+
size, err := LdReadSize(r, zeroLenAsEOF, maxReadBytes)
42+
if err != nil {
43+
return cid.Cid{}, 0, err
44+
}
45+
46+
if size == 0 {
47+
_, _, err := cid.CidFromBytes([]byte{}) // generate zero-byte CID error
48+
if err == nil {
49+
panic("expected zero-byte CID error")
50+
}
51+
return cid.Undef, 0, err
52+
}
53+
54+
limitReader := io.LimitReader(r, int64(size)) // safe due to the `min` above
55+
n, c, err := cid.CidFromReader(limitReader)
56+
if err != nil {
57+
return cid.Cid{}, 0, err
58+
}
59+
60+
return c, size - uint64(n), nil
61+
}
62+
3663
func LdWrite(w io.Writer, d ...[]byte) error {
3764
var sum uint64
3865
for _, s := range d {

0 commit comments

Comments
 (0)