11package car
22
33import (
4+ "errors"
45 "fmt"
56 "io"
67
@@ -23,6 +24,7 @@ type BlockReader struct {
2324 // Used internally only, by BlockReader.Next during iteration over blocks.
2425 r io.Reader
2526 offset uint64
27+ v1offset uint64
2628 readerSize int64
2729 opts Options
2830}
@@ -80,7 +82,8 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) {
8082 if _ , err := rs .Seek (int64 (v2h .DataOffset )- PragmaSize - HeaderSize , io .SeekCurrent ); err != nil {
8183 return nil , err
8284 }
83- br .offset = uint64 (v2h .DataOffset )
85+ br .v1offset = uint64 (v2h .DataOffset )
86+ br .offset = br .v1offset
8487 br .readerSize = int64 (v2h .DataOffset + v2h .DataSize )
8588
8689 // Set br.r to a LimitReader reading from r limited to dataSize.
@@ -96,6 +99,8 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) {
9699 return nil , fmt .Errorf ("invalid data payload header version; expected 1, got %v" , header .Version )
97100 }
98101 br .Roots = header .Roots
102+ hs , _ := carv1 .HeaderSize (header )
103+ br .offset += hs
99104 default :
100105 // Otherwise, error out with invalid version since only versions 1 or 2 are expected.
101106 return nil , fmt .Errorf ("invalid car version: %d" , br .Version )
@@ -136,10 +141,22 @@ func (br *BlockReader) Next() (blocks.Block, error) {
136141 return blocks .NewBlockWithCid (data , c )
137142}
138143
144+ // BlockMetadata contains metadata about a block's section in a CAR file/stream.
145+ //
146+ // There are two offsets for the block data which will be the same if the
147+ // original CAR is a CARv1, but will differ if the original CAR is a CARv2. In
148+ // the case of a CARv2, SourceOffset will be the offset from the beginning of
149+ // the file/steam, and Offset will be the offset from the beginning of the CARv1
150+ // payload container within the CARv2.
151+ //
152+ // Offset is useful for index generation which requires an offset from the CARv1
153+ // payload; while SourceOffset is useful for direct block reads out of the
154+ // source file/stream regardless of version.
139155type BlockMetadata struct {
140156 cid.Cid
141- Offset uint64
142- Size uint64
157+ Offset uint64 // Offset of the block data in the container CARv1
158+ SourceOffset uint64 // SourceOffset is the offset of block data in the source file/stream
159+ Size uint64
143160}
144161
145162// SkipNext jumps over the next block, returning metadata about what it is (the CID, offset, and size).
@@ -148,24 +165,33 @@ type BlockMetadata struct {
148165// If the underlying reader used by the BlockReader is actually a ReadSeeker, this method will attempt to
149166// seek over the underlying data rather than reading it into memory.
150167func (br * BlockReader ) SkipNext () (* BlockMetadata , error ) {
151- sctSize , err := util .LdReadSize (br .r , br .opts .ZeroLengthSectionAsEOF , br .opts .MaxAllowedSectionSize )
168+ sectionSize , err := util .LdReadSize (br .r , br .opts .ZeroLengthSectionAsEOF , br .opts .MaxAllowedSectionSize )
152169 if err != nil {
153170 return nil , err
154171 }
155-
156- if sctSize == 0 {
157- _ , _ , err := cid .CidFromBytes ([]byte {})
172+ if sectionSize == 0 {
173+ _ , _ , err := cid .CidFromBytes ([]byte {}) // generate zero-byte CID error
174+ if err == nil {
175+ panic ("expected zero-byte CID error" )
176+ }
158177 return nil , err
159178 }
160179
161- cidSize , c , err := cid .CidFromReader (io .LimitReader (br .r , int64 (sctSize )))
180+ lenSize := uint64 (varint .UvarintSize (sectionSize ))
181+
182+ cidSize , c , err := cid .CidFromReader (io .LimitReader (br .r , int64 (sectionSize )))
162183 if err != nil {
163184 return nil , err
164185 }
165186
166- blkSize := sctSize - uint64 (cidSize )
187+ blockSize := sectionSize - uint64 (cidSize )
188+ blockOffset := br .offset + lenSize + uint64 (cidSize )
189+
190+ // move our reader forward; either by seeking or slurping
191+
167192 if brs , ok := br .r .(io.ReadSeeker ); ok {
168- // carv1 and we don't know the size, so work it out and cache it
193+ // carv1 and we don't know the size, so work it out and cache it so we
194+ // can use it to determine over-reads
169195 if br .readerSize == - 1 {
170196 cur , err := brs .Seek (0 , io .SeekCurrent )
171197 if err != nil {
@@ -180,42 +206,37 @@ func (br *BlockReader) SkipNext() (*BlockMetadata, error) {
180206 return nil , err
181207 }
182208 }
183- // seek.
184- finalOffset , err := brs .Seek (int64 (blkSize ), io .SeekCurrent )
209+
210+ // seek forward past the block data
211+ finalOffset , err := brs .Seek (int64 (blockSize ), io .SeekCurrent )
185212 if err != nil {
186213 return nil , err
187214 }
188- if finalOffset != int64 (br .offset )+ int64 (sctSize )+ int64 (varint . UvarintSize ( sctSize ) ) {
189- return nil , fmt . Errorf ("unexpected length" )
215+ if finalOffset != int64 (br .offset )+ int64 (lenSize )+ int64 (sectionSize ) {
216+ return nil , errors . New ("unexpected length" )
190217 }
191218 if finalOffset > br .readerSize {
192219 return nil , io .ErrUnexpectedEOF
193220 }
194- br .offset = uint64 (finalOffset )
195- return & BlockMetadata {
196- c ,
197- uint64 (finalOffset ) - sctSize - uint64 (varint .UvarintSize (sctSize )),
198- blkSize ,
199- }, nil
200- }
201-
202- // read to end.
203- readCnt , err := io .CopyN (io .Discard , br .r , int64 (blkSize ))
204- if err != nil {
205- if err == io .EOF {
206- return nil , io .ErrUnexpectedEOF
221+ } else { // just a reader, we need to slurp the block bytes
222+ readCnt , err := io .CopyN (io .Discard , br .r , int64 (blockSize ))
223+ if err != nil {
224+ if err == io .EOF {
225+ return nil , io .ErrUnexpectedEOF
226+ }
227+ return nil , err
228+ }
229+ if readCnt != int64 (blockSize ) {
230+ return nil , errors .New ("unexpected length" )
207231 }
208- return nil , err
209- }
210- if readCnt != int64 (blkSize ) {
211- return nil , fmt .Errorf ("unexpected length" )
212232 }
213- origOffset := br . offset
214- br .offset += uint64 ( varint . UvarintSize ( sctSize )) + sctSize
233+
234+ br .offset = blockOffset + blockSize
215235
216236 return & BlockMetadata {
217- c ,
218- origOffset ,
219- blkSize ,
237+ Cid : c ,
238+ Offset : blockOffset - br .v1offset ,
239+ SourceOffset : blockOffset ,
240+ Size : blockSize ,
220241 }, nil
221242}
0 commit comments