Skip to content

Commit d51b4a1

Browse files
MichaelMurervagg
authored andcommitted
ReadWrite: faster Has() by using the in-memory index instead of reading on disk
Before: // BenchmarkHas-8 190216 6368 ns/op 744 B/op 16 allocs/op After // BenchmarkHas-8 1419169 845.6 ns/op 320 B/op 6 allocs/op ``` func BenchmarkHas(b *testing.B) { ctx := context.TODO() path := filepath.Join(b.TempDir(), "bench-large-v2.car") generateRandomCarV2File(b, path, 200<<20) // 10 MiB defer os.Remove(path) subject, err := blockstore.OpenReadWrite(path, nil) c, err := subject.AllKeysChan(ctx) require.NoError(b, err) var allCids []cid.Cid for c2 := range c { allCids = append(allCids, c2) } b.ReportAllocs() b.ResetTimer() var idx int for i := 0; i < b.N; i++ { _, _ = subject.Has(ctx, allCids[idx]) // require.NoError(b, err) // require.True(b, has) idx = (idx + 1) % len(allCids) } } ```
1 parent 649ff2a commit d51b4a1

File tree

4 files changed

+60
-9
lines changed

4 files changed

+60
-9
lines changed

v2/blockstore/readwrite.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,29 @@ func (b *ReadWrite) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
344344
}
345345

346346
func (b *ReadWrite) Has(ctx context.Context, key cid.Cid) (bool, error) {
347-
return b.ronly.Has(ctx, key)
347+
if !b.opts.StoreIdentityCIDs {
348+
// If we don't store identity CIDs then we can return them straight away as if they are here,
349+
// otherwise we need to check for their existence.
350+
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
351+
if _, ok, err := store.IsIdentity(key); err != nil {
352+
return false, err
353+
} else if ok {
354+
return true, nil
355+
}
356+
}
357+
358+
if ctx.Err() != nil {
359+
return false, ctx.Err()
360+
}
361+
362+
b.ronly.mu.Lock()
363+
defer b.ronly.mu.Unlock()
364+
365+
if b.ronly.closed {
366+
return false, errClosed
367+
}
368+
369+
return b.idx.HasMultihash(key.Hash())
348370
}
349371

350372
func (b *ReadWrite) Get(ctx context.Context, key cid.Cid) (blocks.Block, error) {

v2/index/index.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
"github.com/multiformats/go-varint"
1414
)
1515

16-
// CarIndexNone is a sentinal value used as a multicodec code for the index indicating no index.
16+
// CarIndexNone is a sentinel value used as a multicodec code for the index indicating no index.
1717
const CarIndexNone = 0x300000
1818

1919
type (
@@ -46,7 +46,7 @@ type (
4646
// Unmarshal decodes the index from its serial form.
4747
// Note, this function will copy the entire index into memory.
4848
//
49-
// Do not unmarshal index from untrusted CARv2 files. Instead the index should be
49+
// Do not unmarshal index from untrusted CARv2 files. Instead, the index should be
5050
// regenerated from the CARv2 data payload.
5151
Unmarshal(r io.Reader) error
5252

@@ -84,7 +84,7 @@ type (
8484
// and the ForEach function returns the error to the user.
8585
//
8686
// An index may contain multiple offsets corresponding to the same multihash, e.g. via duplicate blocks.
87-
// In such cases, the given function may be called multiple times with the same multhihash but different offset.
87+
// In such cases, the given function may be called multiple times with the same multihash but different offset.
8888
//
8989
// The order of calls to the given function is deterministic, but entirely index-specific.
9090
ForEach(func(multihash.Multihash, uint64) error) error

v2/internal/store/insertionindex.go

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,10 @@ func (ii *InsertionIndex) Flatten(codec multicodec.Code) (index.Index, error) {
212212
// but it's separate as it allows us to compare Record.Cid directly,
213213
// whereas GetAll just provides Record.Offset.
214214

215-
func (ii *InsertionIndex) HasExactCID(c cid.Cid) bool {
215+
func (ii *InsertionIndex) HasExactCID(c cid.Cid) (bool, error) {
216216
d, err := multihash.Decode(c.Hash())
217217
if err != nil {
218-
panic(err)
218+
return false, err
219219
}
220220
entry := recordDigest{digest: d.Digest}
221221

@@ -235,5 +235,30 @@ func (ii *InsertionIndex) HasExactCID(c cid.Cid) bool {
235235
return true
236236
}
237237
ii.items.AscendGreaterOrEqual(entry, iter)
238-
return found
238+
return found, nil
239+
}
240+
241+
func (ii *InsertionIndex) HasMultihash(mh multihash.Multihash) (bool, error) {
242+
d, err := multihash.Decode(mh)
243+
if err != nil {
244+
return false, err
245+
}
246+
entry := recordDigest{digest: d.Digest}
247+
248+
found := false
249+
iter := func(i llrb.Item) bool {
250+
existing := i.(recordDigest)
251+
if !bytes.Equal(existing.digest, entry.digest) {
252+
// We've already looked at all entries with matching digests.
253+
return false
254+
}
255+
if bytes.Equal(existing.Record.Cid.Hash(), mh) {
256+
found = true
257+
return false
258+
}
259+
// Continue looking in ascending order.
260+
return true
261+
}
262+
ii.items.AscendGreaterOrEqual(entry, iter)
263+
return found, nil
239264
}

v2/internal/store/put.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ func ShouldPut(
3838
}
3939

4040
if !blockstoreAllowDuplicatePuts {
41-
if blockstoreUseWholeCIDs && idx.HasExactCID(c) {
42-
return false, nil // deduplicated by CID
41+
if blockstoreUseWholeCIDs {
42+
has, err := idx.HasExactCID(c)
43+
if err != nil {
44+
return false, err
45+
}
46+
return !has, nil // deduplicated by CID
4347
}
4448
if !blockstoreUseWholeCIDs {
4549
_, err := idx.Get(c)

0 commit comments

Comments
 (0)