Skip to content

Commit 02d658f

Browse files
committed
feat: Has() and Get() will respect StoreIdentityCIDs option
When StoreIdentityCIDs is set, it will defer to the index to check whether the blocks are in the CAR. When the CAR is a v1 and StoreIdentityCIDs is set, the index will contain the identity CIDs. When it's a v2 with an existing index, however that index was created will determine whether the identity CIDs are that are in the CAR are found. When StoreIdentityCIDs is not set, Has() will always return true and Get() will always return the block.
1 parent 1478bbd commit 02d658f

File tree

4 files changed

+80
-20
lines changed

4 files changed

+80
-20
lines changed

v2/blockstore/readonly.go

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,23 @@ func (b *ReadOnly) DeleteBlock(_ context.Context, _ cid.Cid) error {
217217
}
218218

219219
// Has indicates if the store contains a block that corresponds to the given key.
220-
// This function always returns true for any given key with multihash.IDENTITY code.
220+
// This function always returns true for any given key with multihash.IDENTITY
221+
// code unless the StoreIdentityCIDs option is on, in which case it will defer
222+
// to the index to check for the existence of the block; the index may or may
223+
// not contain identity CIDs included in this CAR, depending on whether
224+
// StoreIdentityCIDs was on when the index was created. If the CAR is a CARv1
225+
// and StoreIdentityCIDs is on, then the index will contain identity CIDs and
226+
// this will always return true.
221227
func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) {
222-
// Check if the given CID has multihash.IDENTITY code
223-
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
224-
if _, ok, err := isIdentity(key); err != nil {
225-
return false, err
226-
} else if ok {
227-
return true, nil
228+
if !b.opts.StoreIdentityCIDs {
229+
// If we don't store identity CIDs then we can return them straight away as if they are here,
230+
// otherwise we need to check for their existence.
231+
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
232+
if _, ok, err := isIdentity(key); err != nil {
233+
return false, err
234+
} else if ok {
235+
return true, nil
236+
}
228237
}
229238

230239
b.mu.RLock()
@@ -269,14 +278,23 @@ func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) {
269278
}
270279

271280
// Get gets a block corresponding to the given key.
272-
// This API will always return true if the given key has multihash.IDENTITY code.
281+
// This function always returns the block for any given key with
282+
// multihash.IDENTITY code unless the StoreIdentityCIDs option is on, in which
283+
// case it will defer to the index to check for the existence of the block; the
284+
// index may or may not contain identity CIDs included in this CAR, depending on
285+
// whether StoreIdentityCIDs was on when the index was created. If the CAR is a
286+
// CARv1 and StoreIdentityCIDs is on, then the index will contain identity CIDs
287+
// and this will always return true.
273288
func (b *ReadOnly) Get(ctx context.Context, key cid.Cid) (blocks.Block, error) {
274-
// Check if the given CID has multihash.IDENTITY code
275-
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
276-
if digest, ok, err := isIdentity(key); err != nil {
277-
return nil, err
278-
} else if ok {
279-
return blocks.NewBlockWithCid(digest, key)
289+
if !b.opts.StoreIdentityCIDs {
290+
// If we don't store identity CIDs then we can return them straight away as if they are here,
291+
// otherwise we need to check for their existence.
292+
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
293+
if digest, ok, err := isIdentity(key); err != nil {
294+
return nil, err
295+
} else if ok {
296+
return blocks.NewBlockWithCid(digest, key)
297+
}
280298
}
281299

282300
b.mu.RLock()

v2/blockstore/readonly_test.go

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/ipfs/go-merkledag"
1515
carv2 "github.com/ipld/go-car/v2"
1616
"github.com/ipld/go-car/v2/internal/carv1"
17+
"github.com/multiformats/go-multicodec"
1718
"github.com/stretchr/testify/require"
1819
)
1920

@@ -33,31 +34,53 @@ func TestReadOnly(t *testing.T) {
3334
name string
3435
v1OrV2path string
3536
opts []carv2.Option
37+
noIdCids bool
3638
}{
3739
{
3840
"OpenedWithCarV1",
3941
"../testdata/sample-v1.car",
4042
[]carv2.Option{UseWholeCIDs(true), carv2.StoreIdentityCIDs(true)},
43+
// index is made, but identity CIDs are included so they'll be found
44+
false,
45+
},
46+
{
47+
"OpenedWithCarV1_NoIdentityCID",
48+
"../testdata/sample-v1.car",
49+
[]carv2.Option{UseWholeCIDs(true)},
50+
// index is made, identity CIDs are not included, but we always short-circuit when StoreIdentityCIDs(false)
51+
false,
4152
},
4253
{
4354
"OpenedWithCarV2",
4455
"../testdata/sample-wrapped-v2.car",
4556
[]carv2.Option{UseWholeCIDs(true), carv2.StoreIdentityCIDs(true)},
57+
// index already exists, but was made without identity CIDs, but opening with StoreIdentityCIDs(true) means we check the index
58+
true,
59+
},
60+
{
61+
"OpenedWithCarV2_NoIdentityCID",
62+
"../testdata/sample-wrapped-v2.car",
63+
[]carv2.Option{UseWholeCIDs(true)},
64+
// index already exists, it was made without identity CIDs, but we always short-circuit when StoreIdentityCIDs(false)
65+
false,
4666
},
4767
{
4868
"OpenedWithCarV1ZeroLenSection",
4969
"../testdata/sample-v1-with-zero-len-section.car",
5070
[]carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)},
71+
false,
5172
},
5273
{
5374
"OpenedWithAnotherCarV1ZeroLenSection",
5475
"../testdata/sample-v1-with-zero-len-section2.car",
5576
[]carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)},
77+
false,
5678
},
5779
}
5880
for _, tt := range tests {
5981
t.Run(tt.name, func(t *testing.T) {
6082
ctx := context.TODO()
83+
6184
subject, err := OpenReadOnly(tt.v1OrV2path, tt.opts...)
6285
require.NoError(t, err)
6386
t.Cleanup(func() { require.NoError(t, subject.Close()) })
@@ -89,7 +112,13 @@ func TestReadOnly(t *testing.T) {
89112
// Assert blockstore contains key.
90113
has, err := subject.Has(ctx, key)
91114
require.NoError(t, err)
92-
require.True(t, has)
115+
if key.Prefix().MhType == uint64(multicodec.Identity) && tt.noIdCids {
116+
// fixture wasn't made with StoreIdentityCIDs, but we opened it with StoreIdentityCIDs,
117+
// so they aren't there to find
118+
require.False(t, has)
119+
} else {
120+
require.True(t, has)
121+
}
93122

94123
// Assert size matches block raw data length.
95124
gotSize, err := subject.GetSize(ctx, key)
@@ -98,9 +127,11 @@ func TestReadOnly(t *testing.T) {
98127
require.Equal(t, wantSize, gotSize)
99128

100129
// Assert block itself matches v1 payload block.
101-
gotBlock, err := subject.Get(ctx, key)
102-
require.NoError(t, err)
103-
require.Equal(t, wantBlock, gotBlock)
130+
if has {
131+
gotBlock, err := subject.Get(ctx, key)
132+
require.NoError(t, err)
133+
require.Equal(t, wantBlock, gotBlock)
134+
}
104135

105136
// Assert write operations error
106137
require.Error(t, subject.Put(ctx, wantBlock))

v2/index_gen.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ func GenerateIndex(v1r io.Reader, opts ...Option) (index.Index, error) {
3434
// LoadIndex populates idx with index records generated from r.
3535
// The r may be in CARv1 or CARv2 format.
3636
//
37+
// If the StoreIdentityCIDs option is set when calling LoadIndex, identity
38+
// CIDs will be included in the index. By default this option is off, and
39+
// identity CIDs will not be included in the index.
40+
//
3741
// Note, the index is re-generated every time even if r is in CARv2 format and already has an index.
3842
// To read existing index when available see ReadOrGenerateIndex.
3943
func LoadIndex(idx index.Index, r io.Reader, opts ...Option) error {

v2/options.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,15 @@ func WithoutIndex() Option {
127127

128128
// StoreIdentityCIDs sets whether to persist sections that are referenced by
129129
// CIDs with multihash.IDENTITY digest.
130-
// When writing CAR files with this option,
131-
// Characteristics.IsFullyIndexed will be set.
130+
// When writing CAR files with this option, Characteristics.IsFullyIndexed will
131+
// be set.
132+
//
133+
// By default, the blockstore interface will always return true for Has() called
134+
// with identity CIDs, but when this option is turned on, it will defer to the
135+
// index.
136+
//
137+
// When creating an index (or loading a CARv1 as a blockstore), when this option
138+
// is on, identity CIDs will be included in the index.
132139
//
133140
// This option is disabled by default.
134141
func StoreIdentityCIDs(b bool) Option {

0 commit comments

Comments
 (0)