Skip to content

Commit 74cb8d1

Browse files
MB-70410: Simplify CoalesceQueue in hierarchical nested search (#2283)
- Simplifies the `CoalesceQueue` used in nested conjunction search by moving coalescing from enqueue to dequeue. This removes the separate map and reduces memory use while keeping behavior the same.
1 parent 9d07cde commit 74cb8d1

File tree

1 file changed

+32
-51
lines changed

1 file changed

+32
-51
lines changed

search/searcher/search_conjunction_nested.go

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ func (s *NestedConjunctionSearcher) Next(ctx *search.SearchContext) (*search.Doc
199199
}
200200
// check if the docQueue has any buffered matches
201201
if s.docQueue.Len() > 0 {
202-
return s.docQueue.Dequeue()
202+
return s.docQueue.Dequeue(ctx), nil
203203
}
204204
// now enter the main alignment loop
205205
n := len(s.searchers)
@@ -271,17 +271,11 @@ OUTER:
271271
// now we need to buffer all the intermediate matches for every
272272
// searcher at this key, until either the searcher's key changes
273273
// or the searcher is exhausted
274+
var err error
274275
for i := 0; i < n; i++ {
275276
for {
276277
// buffer the current match
277-
recycle, err := s.docQueue.Enqueue(s.currs[i])
278-
if err != nil {
279-
return nil, err
280-
}
281-
if recycle != nil {
282-
// we got a match to recycle
283-
ctx.DocumentMatchPool.Put(recycle)
284-
}
278+
s.docQueue.Enqueue(s.currs[i])
285279
// advance to next match
286280
s.currs[i], err = s.searchers[i].Next(ctx)
287281
if err != nil {
@@ -308,7 +302,7 @@ OUTER:
308302
// finalize the docQueue for dequeueing
309303
s.docQueue.Finalize()
310304
// finally return the first buffered match
311-
return s.docQueue.Dequeue()
305+
return s.docQueue.Dequeue(ctx), nil
312306
}
313307
}
314308

@@ -341,10 +335,7 @@ func (s *NestedConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.
341335
// first check if the docQueue has any buffered matches
342336
// if so we first check if any of them can satisfy the Advance(ID)
343337
for s.docQueue.Len() > 0 {
344-
dm, err := s.docQueue.Dequeue()
345-
if err != nil {
346-
return nil, err
347-
}
338+
dm := s.docQueue.Dequeue(ctx)
348339
if dm.IndexInternalID.Compare(ID) >= 0 {
349340
return dm, nil
350341
}
@@ -421,45 +412,21 @@ func (s *NestedConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.
421412

422413
// ------------------------------------------------------------------------------------------
423414
type CoalesceQueue struct {
424-
order []*search.DocumentMatch // queue of DocumentMatch
425-
items map[uint64]*search.DocumentMatch // map of ID to DocumentMatch
415+
order []*search.DocumentMatch // queue of DocumentMatch
426416
}
427417

428418
func NewCoalesceQueue() *CoalesceQueue {
429419
cq := &CoalesceQueue{
430420
order: make([]*search.DocumentMatch, 0),
431-
items: make(map[uint64]*search.DocumentMatch),
432421
}
433422
return cq
434423
}
435424

436-
// Enqueue adds the given DocumentMatch to the queue. If a DocumentMatch with the same
437-
// IndexInternalID already exists in the queue, it merges the scores and explanations,
438-
// and returns the given DocumentMatch for recycling. If it's a new entry, it adds it
439-
// to the queue and returns nil.
440-
func (cq *CoalesceQueue) Enqueue(it *search.DocumentMatch) (*search.DocumentMatch, error) {
441-
val, err := it.IndexInternalID.Value()
442-
if err != nil {
443-
// cannot coalesce without a valid uint64 ID
444-
return nil, err
445-
}
446-
447-
if existing, ok := cq.items[val]; ok {
448-
// merge with current version
449-
existing.Score += it.Score
450-
existing.Expl = existing.Expl.MergeWith(it.Expl)
451-
existing.FieldTermLocations = search.MergeFieldTermLocationsFromMatch(
452-
existing.FieldTermLocations, it)
453-
// return it to caller for recycling
454-
return it, nil
455-
}
456-
457-
// first time we see this ID — enqueue
458-
cq.items[val] = it
425+
// Enqueue appends the given DocumentMatch to the queue. Coalescing of duplicates
426+
// is deferred until Dequeue, after Finalize has sorted items by IndexInternalID.
427+
func (cq *CoalesceQueue) Enqueue(it *search.DocumentMatch) {
459428
// append to order slice (this is a stack)
460429
cq.order = append(cq.order, it)
461-
// no recycling needed as we added a new item
462-
return nil, nil
463430
}
464431

465432
// Finalize prepares the queue for dequeue operations by sorting the items based on
@@ -473,24 +440,38 @@ func (cq *CoalesceQueue) Finalize() {
473440
})
474441
}
475442

476-
// Dequeue removes and returns the next DocumentMatch from the queue in sorted order.
477-
// If the queue is empty, it returns nil.
478-
func (cq *CoalesceQueue) Dequeue() (*search.DocumentMatch, error) {
443+
// Dequeue removes and returns the next DocumentMatch in sorted order, merging any
444+
// consecutive duplicates. Merged items are recycled via ctx.DocumentMatchPool.
445+
// Returns nil when the queue is empty.
446+
func (cq *CoalesceQueue) Dequeue(ctx *search.SearchContext) *search.DocumentMatch {
479447
if cq.Len() == 0 {
480-
return nil, nil
448+
return nil
481449
}
482450

483451
// pop from end of slice
484452
rv := cq.order[len(cq.order)-1]
485453
cq.order = cq.order[:len(cq.order)-1]
486454

487-
val, err := rv.IndexInternalID.Value()
488-
if err != nil {
489-
return nil, err
455+
// merge duplicates
456+
for cq.Len() > 0 {
457+
// peek at next item
458+
next := cq.order[len(cq.order)-1]
459+
if !rv.IndexInternalID.Equals(next.IndexInternalID) {
460+
// different ID, stop merging
461+
break
462+
}
463+
// pop the next item
464+
cq.order = cq.order[:len(cq.order)-1]
465+
// same ID, merge
466+
rv.Score += next.Score
467+
rv.Expl = rv.Expl.MergeWith(next.Expl)
468+
rv.FieldTermLocations = search.MergeFieldTermLocationsFromMatch(
469+
rv.FieldTermLocations, next)
470+
// recycle the merged item
471+
ctx.DocumentMatchPool.Put(next)
490472
}
491473

492-
delete(cq.items, val)
493-
return rv, nil
474+
return rv
494475
}
495476

496477
// Len returns the number of DocumentMatch items currently in the queue.

0 commit comments

Comments
 (0)