Skip to content

Commit c93f536

Browse files
authored
Merge pull request #251 from ipld/rvagg/selective-traversal-options
Expose selector traversal options for SelectiveCar
2 parents f437812 + 1192236 commit c93f536

File tree

7 files changed

+333
-112
lines changed

7 files changed

+333
-112
lines changed

car_test.go

Lines changed: 7 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package car
1+
package car_test
22

33
import (
44
"bytes"
@@ -12,10 +12,7 @@ import (
1212
format "github.com/ipfs/go-ipld-format"
1313
"github.com/ipfs/go-merkledag"
1414
dstest "github.com/ipfs/go-merkledag/test"
15-
basicnode "github.com/ipld/go-ipld-prime/node/basic"
16-
"github.com/ipld/go-ipld-prime/traversal/selector"
17-
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
18-
"github.com/stretchr/testify/require"
15+
car "github.com/ipld/go-car"
1916
)
2017

2118
func assertAddNodes(t *testing.T, ds format.DAGService, nds ...format.Node) {
@@ -46,12 +43,12 @@ func TestRoundtrip(t *testing.T) {
4643
assertAddNodes(t, dserv, a, b, c, nd1, nd2, nd3)
4744

4845
buf := new(bytes.Buffer)
49-
if err := WriteCar(context.Background(), dserv, []cid.Cid{nd3.Cid()}, buf); err != nil {
46+
if err := car.WriteCar(context.Background(), dserv, []cid.Cid{nd3.Cid()}, buf); err != nil {
5047
t.Fatal(err)
5148
}
5249

5350
bserv := dstest.Bserv()
54-
ch, err := LoadCar(bserv.Blockstore(), buf)
51+
ch, err := car.LoadCar(bserv.Blockstore(), buf)
5552
if err != nil {
5653
t.Fatal(err)
5754
}
@@ -77,111 +74,15 @@ func TestRoundtrip(t *testing.T) {
7774
}
7875
}
7976

80-
func TestRoundtripSelective(t *testing.T) {
81-
sourceBserv := dstest.Bserv()
82-
sourceBs := sourceBserv.Blockstore()
83-
dserv := merkledag.NewDAGService(sourceBserv)
84-
a := merkledag.NewRawNode([]byte("aaaa"))
85-
b := merkledag.NewRawNode([]byte("bbbb"))
86-
c := merkledag.NewRawNode([]byte("cccc"))
87-
88-
nd1 := &merkledag.ProtoNode{}
89-
nd1.AddNodeLink("cat", a)
90-
91-
nd2 := &merkledag.ProtoNode{}
92-
nd2.AddNodeLink("first", nd1)
93-
nd2.AddNodeLink("dog", b)
94-
nd2.AddNodeLink("repeat", nd1)
95-
96-
nd3 := &merkledag.ProtoNode{}
97-
nd3.AddNodeLink("second", nd2)
98-
nd3.AddNodeLink("bear", c)
99-
100-
assertAddNodes(t, dserv, a, b, c, nd1, nd2, nd3)
101-
102-
ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)
103-
104-
// the graph assembled above looks as follows, in order:
105-
// nd3 -> [c, nd2 -> [nd1 -> a, b, nd1 -> a]]
106-
// this selector starts at n3, and traverses a link at index 1 (nd2, the second link, zero indexed)
107-
// it then recursively traverses all of its children
108-
// the only node skipped is 'c' -- link at index 0 immediately below nd3
109-
// the purpose is simply to show we are not writing the entire merkledag underneath
110-
// nd3
111-
selector := ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
112-
efsb.Insert("Links",
113-
ssb.ExploreIndex(1, ssb.ExploreRecursive(selector.RecursionLimitNone(), ssb.ExploreAll(ssb.ExploreRecursiveEdge()))))
114-
}).Node()
115-
116-
sc := NewSelectiveCar(context.Background(), sourceBs, []Dag{{Root: nd3.Cid(), Selector: selector}})
117-
118-
// write car in one step
119-
buf := new(bytes.Buffer)
120-
blockCount := 0
121-
var oneStepBlocks []Block
122-
err := sc.Write(buf, func(block Block) error {
123-
oneStepBlocks = append(oneStepBlocks, block)
124-
blockCount++
125-
return nil
126-
})
127-
require.Equal(t, blockCount, 5)
128-
require.NoError(t, err)
129-
130-
// create a new builder for two-step write
131-
sc2 := NewSelectiveCar(context.Background(), sourceBs, []Dag{{Root: nd3.Cid(), Selector: selector}})
132-
133-
// write car in two steps
134-
var twoStepBlocks []Block
135-
scp, err := sc2.Prepare(func(block Block) error {
136-
twoStepBlocks = append(twoStepBlocks, block)
137-
return nil
138-
})
139-
require.NoError(t, err)
140-
buf2 := new(bytes.Buffer)
141-
err = scp.Dump(buf2)
142-
require.NoError(t, err)
143-
144-
// verify preparation step correctly assesed length and blocks
145-
require.Equal(t, scp.Size(), uint64(buf.Len()))
146-
require.Equal(t, len(scp.Cids()), blockCount)
147-
148-
// verify equal data written by both methods
149-
require.Equal(t, buf.Bytes(), buf2.Bytes())
150-
151-
// verify equal blocks were passed to user block hook funcs
152-
require.Equal(t, oneStepBlocks, twoStepBlocks)
153-
154-
// readout car and verify contents
155-
bserv := dstest.Bserv()
156-
ch, err := LoadCar(bserv.Blockstore(), buf)
157-
require.NoError(t, err)
158-
require.Equal(t, len(ch.Roots), 1)
159-
160-
require.True(t, ch.Roots[0].Equals(nd3.Cid()))
161-
162-
bs := bserv.Blockstore()
163-
for _, nd := range []format.Node{a, b, nd1, nd2, nd3} {
164-
has, err := bs.Has(nd.Cid())
165-
require.NoError(t, err)
166-
require.True(t, has)
167-
}
168-
169-
for _, nd := range []format.Node{c} {
170-
has, err := bs.Has(nd.Cid())
171-
require.NoError(t, err)
172-
require.False(t, has)
173-
}
174-
}
175-
17677
func TestEOFHandling(t *testing.T) {
17778
// fixture is a clean single-block, single-root CAR
17879
fixture, err := hex.DecodeString("3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5")
17980
if err != nil {
18081
t.Fatal(err)
18182
}
18283

183-
load := func(t *testing.T, byts []byte) *CarReader {
184-
cr, err := NewCarReader(bytes.NewReader(byts))
84+
load := func(t *testing.T, byts []byte) *car.CarReader {
85+
cr, err := car.NewCarReader(bytes.NewReader(byts))
18586
if err != nil {
18687
t.Fatal(err)
18788
}
@@ -294,7 +195,7 @@ func TestBadHeaders(t *testing.T) {
294195
if err != nil {
295196
t.Fatal(err)
296197
}
297-
_, err = NewCarReader(bytes.NewReader(fixture))
198+
_, err = car.NewCarReader(bytes.NewReader(fixture))
298199
return err
299200
}
300201

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ require (
77
github.com/ipfs/go-ipld-format v0.2.0
88
github.com/ipfs/go-merkledag v0.3.2
99
github.com/ipld/go-codec-dagpb v1.2.0
10-
github.com/ipld/go-ipld-prime v0.9.0
10+
github.com/ipld/go-ipld-prime v0.12.3-0.20210930132912-0b3aef3ca569
1111
github.com/multiformats/go-multihash v0.0.15
1212
github.com/stretchr/testify v1.7.0
1313
)

go.sum

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2
116116
github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0=
117117
github.com/ipld/go-codec-dagpb v1.2.0 h1:2umV7ud8HBMkRuJgd8gXw95cLhwmcYrihS3cQEy9zpI=
118118
github.com/ipld/go-codec-dagpb v1.2.0/go.mod h1:6nBN7X7h8EOsEejZGqC7tej5drsdBAXbMHyBT+Fne5s=
119-
github.com/ipld/go-ipld-prime v0.9.0 h1:N2OjJMb+fhyFPwPnVvJcWU/NsumP8etal+d2v3G4eww=
120119
github.com/ipld/go-ipld-prime v0.9.0/go.mod h1:KvBLMr4PX1gWptgkzRjVZCrLmSGcZCb/jioOQwCqZN8=
120+
github.com/ipld/go-ipld-prime v0.12.3-0.20210930132912-0b3aef3ca569 h1:UDHkozLpTefhQzyu/2BWVRvsFHjhzvL387KsfFqE1vc=
121+
github.com/ipld/go-ipld-prime v0.12.3-0.20210930132912-0b3aef3ca569/go.mod h1:PaeLYq8k6dJLmDUSLrzkEpoGV4PEfe/1OtFN/eALOc8=
121122
github.com/jackpal/gateway v1.0.5 h1:qzXWUJfuMdlLMtt0a3Dgt+xkWQiA5itDEITVJtuSwMc=
122123
github.com/jackpal/gateway v1.0.5/go.mod h1:lTpwd4ACLXmpyiCTRtfiNyVnUmqT9RivzCDQetPfnjA=
123124
github.com/jackpal/go-nat-pmp v1.0.1 h1:i0LektDkO1QlrTm/cSuP+PyBCDnYvjPLGl4LdWEMiaA=
@@ -237,6 +238,8 @@ github.com/multiformats/go-multiaddr-net v0.0.1/go.mod h1:nw6HSxNmCIQH27XPGBuX+d
237238
github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs=
238239
github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk=
239240
github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc=
241+
github.com/multiformats/go-multicodec v0.3.0 h1:tstDwfIjiHbnIjeM5Lp+pMrSeN+LCMsEwOrkPmWm03A=
242+
github.com/multiformats/go-multicodec v0.3.0/go.mod h1:qGGaQmioCDh+TeFOnxrbU0DaIPw8yFgAZgFG0V7p1qQ=
240243
github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U=
241244
github.com/multiformats/go-multihash v0.0.5/go.mod h1:lt/HCbqlQwlPBz7lv0sQCdtfcMtlJvakRUn/0Ual8po=
242245
github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew=
@@ -279,6 +282,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
279282
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
280283
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
281284
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
285+
github.com/warpfork/go-testmark v0.3.0 h1:Q81c4u7hT+BR5kNfNQhEF0VT2pmL7+Kk0wD+ORYl7iA=
286+
github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0=
282287
github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw=
283288
github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a h1:G++j5e0OC488te356JvdhaM8YS6nMsjLAYF7JxCv07w=
284289
github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw=

options.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package car
2+
3+
import "math"
4+
5+
// options holds the configured options after applying a number of
6+
// Option funcs.
7+
type options struct {
8+
TraverseLinksOnlyOnce bool
9+
MaxTraversalLinks uint64
10+
}
11+
12+
// Option describes an option which affects behavior when
13+
// interacting with the interface.
14+
type Option func(*options)
15+
16+
// TraverseLinksOnlyOnce prevents the traversal engine from repeatedly visiting
17+
// the same links more than once.
18+
//
19+
// This can be an efficient strategy for an exhaustive selector where it's known
20+
// that repeat visits won't impact the completeness of execution. However it
21+
// should be used with caution with most other selectors as repeat visits of
22+
// links for different reasons during selector execution can be valid and
23+
// necessary to perform full traversal.
24+
func TraverseLinksOnlyOnce() Option {
25+
return func(sco *options) {
26+
sco.TraverseLinksOnlyOnce = true
27+
}
28+
}
29+
30+
// MaxTraversalLinks changes the allowed number of links a selector traversal
31+
// can execute before failing.
32+
//
33+
// Note that setting this option may cause an error to be returned from selector
34+
// execution when building a SelectiveCar.
35+
func MaxTraversalLinks(MaxTraversalLinks uint64) Option {
36+
return func(sco *options) {
37+
sco.MaxTraversalLinks = MaxTraversalLinks
38+
}
39+
}
40+
41+
// applyOptions applies given opts and returns the resulting options.
42+
func applyOptions(opt ...Option) options {
43+
opts := options{
44+
TraverseLinksOnlyOnce: false, // default: recurse until exhausted
45+
MaxTraversalLinks: math.MaxInt64, // default: traverse all
46+
}
47+
for _, o := range opt {
48+
o(&opts)
49+
}
50+
return opts
51+
}

options_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package car
2+
3+
import (
4+
"math"
5+
"testing"
6+
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestApplyOptions_SetsExpectedDefaults(t *testing.T) {
11+
require.Equal(t, options{
12+
MaxTraversalLinks: math.MaxInt64,
13+
TraverseLinksOnlyOnce: false,
14+
}, applyOptions())
15+
}
16+
17+
func TestApplyOptions_AppliesOptions(t *testing.T) {
18+
require.Equal(t,
19+
options{
20+
MaxTraversalLinks: 123,
21+
TraverseLinksOnlyOnce: true,
22+
},
23+
applyOptions(
24+
MaxTraversalLinks(123),
25+
TraverseLinksOnlyOnce(),
26+
))
27+
}

selectivecar.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"io"
9+
"math"
910

1011
cid "github.com/ipfs/go-cid"
1112
util "github.com/ipld/go-car/util"
@@ -40,6 +41,7 @@ type SelectiveCar struct {
4041
ctx context.Context
4142
dags []Dag
4243
store ReadStore
44+
opts options
4345
}
4446

4547
// OnCarHeaderFunc is called during traversal when the header is created
@@ -61,16 +63,16 @@ type SelectiveCarPrepared struct {
6163

6264
// NewSelectiveCar creates a new SelectiveCar for the given car file based
6365
// a block store and set of root+selector pairs
64-
func NewSelectiveCar(ctx context.Context, store ReadStore, dags []Dag) SelectiveCar {
66+
func NewSelectiveCar(ctx context.Context, store ReadStore, dags []Dag, opts ...Option) SelectiveCar {
6567
return SelectiveCar{
6668
ctx: ctx,
6769
store: store,
6870
dags: dags,
71+
opts: applyOptions(opts...),
6972
}
7073
}
7174

7275
func (sc SelectiveCar) traverse(onCarHeader OnCarHeaderFunc, onNewCarBlock OnNewCarBlockFunc) (uint64, error) {
73-
7476
traverser := &selectiveCarTraverser{onCarHeader, onNewCarBlock, 0, cid.NewSet(), sc, cidlink.DefaultLinkSystem()}
7577
traverser.lsys.StorageReadOpener = traverser.loader
7678
return traverser.traverse()
@@ -264,13 +266,21 @@ func (sct *selectiveCarTraverser) traverseBlocks() error {
264266
if err != nil {
265267
return err
266268
}
267-
err = traversal.Progress{
269+
prog := traversal.Progress{
268270
Cfg: &traversal.Config{
269271
Ctx: sct.sc.ctx,
270272
LinkSystem: sct.lsys,
271273
LinkTargetNodePrototypeChooser: nsc,
274+
LinkVisitOnlyOnce: sct.sc.opts.TraverseLinksOnlyOnce,
272275
},
273-
}.WalkAdv(nd, parsed, func(traversal.Progress, ipld.Node, traversal.VisitReason) error { return nil })
276+
}
277+
if sct.sc.opts.MaxTraversalLinks < math.MaxInt64 {
278+
prog.Budget = &traversal.Budget{
279+
NodeBudget: math.MaxInt64,
280+
LinkBudget: int64(sct.sc.opts.MaxTraversalLinks),
281+
}
282+
}
283+
err = prog.WalkAdv(nd, parsed, func(traversal.Progress, ipld.Node, traversal.VisitReason) error { return nil })
274284
if err != nil {
275285
return err
276286
}

0 commit comments

Comments
 (0)