Skip to content

Commit

Permalink
MB-62230 - Pre-filtering Optimisation (#2098)
Browse files Browse the repository at this point in the history
This PR - 
1. Avoids creating document matches for the pre-filter phase, when IDs
suffice.
2. Re-uses document matches by adding them to the doc match pool after
each hit.

---------

Co-authored-by: Abhinav Dangeti <[email protected]>
  • Loading branch information
metonymic-smokey and abhinavdangeti authored Nov 13, 2024
1 parent 902051d commit e72f7c2
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 53 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ require (
github.com/blevesearch/zapx/v13 v13.3.10
github.com/blevesearch/zapx/v14 v14.3.10
github.com/blevesearch/zapx/v15 v15.3.16
github.com/blevesearch/zapx/v16 v16.1.8-0.20241104164502-f19d5f0cdbcb
github.com/blevesearch/zapx/v16 v16.1.8
github.com/couchbase/moss v0.2.0
github.com/golang/protobuf v1.3.2
github.com/spf13/cobra v1.7.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE=
github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.1.8-0.20241104164502-f19d5f0cdbcb h1:+LkKIOe8vnyxmHLI8iOa8vpv9h46qYait5znwcl7Utg=
github.com/blevesearch/zapx/v16 v16.1.8-0.20241104164502-f19d5f0cdbcb/go.mod h1:JqQlOqlRVaYDkpLIl3JnKql8u4zKTNlVEa3nLsi0Gn8=
github.com/blevesearch/zapx/v16 v16.1.8 h1:Bxzpw6YQpFs7UjoCV1+RvDw6fmAT2GZxldwX8b3wVBM=
github.com/blevesearch/zapx/v16 v16.1.8/go.mod h1:JqQlOqlRVaYDkpLIl3JnKql8u4zKTNlVEa3nLsi0Gn8=
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
Expand Down
40 changes: 15 additions & 25 deletions search/collector/eligible.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type EligibleCollector struct {
took time.Duration
results search.DocumentMatchCollection

store collectorStore
ids []index.IndexInternalID
}

func NewEligibleCollector(size int) *EligibleCollector {
Expand All @@ -38,13 +38,9 @@ func NewEligibleCollector(size int) *EligibleCollector {

func newEligibleCollector(size int) *EligibleCollector {
// No sort order & skip always 0 since this is only to filter eligible docs.
ec := &EligibleCollector{size: size}

// comparator is a dummy here
ec.store = getOptimalCollectorStore(size, 0, func(i, j *search.DocumentMatch) int {
return 0
})

ec := &EligibleCollector{size: size,
ids: make([]index.IndexInternalID, 0, size),
}
return ec
}

Expand All @@ -55,8 +51,13 @@ func makeEligibleDocumentMatchHandler(ctx *search.SearchContext) (search.Documen
return nil
}

// No elements removed from the store here.
_ = ec.store.Add(d)
copyOfID := make([]byte, len(d.IndexInternalID))
copy(copyOfID, d.IndexInternalID)
ec.ids = append(ec.ids, copyOfID)

// recycle the DocumentMatch
ctx.DocumentMatchPool.Put(d)

return nil
}, nil
}
Expand Down Expand Up @@ -122,26 +123,15 @@ func (ec *EligibleCollector) Collect(ctx context.Context, searcher search.Search
// compute search duration
ec.took = time.Since(startTime)

// finalize actual results
err = ec.finalizeResults(reader)
if err != nil {
return err
}
return nil
}

func (ec *EligibleCollector) finalizeResults(r index.IndexReader) error {
var err error
ec.results, err = ec.store.Final(0, func(doc *search.DocumentMatch) error {
// Adding the results to the store without any modifications since we don't
// require the external ID of the filtered hits.
return nil
})
return err
func (ec *EligibleCollector) Results() search.DocumentMatchCollection {
return nil
}

func (ec *EligibleCollector) Results() search.DocumentMatchCollection {
return ec.results
func (ec *EligibleCollector) IDs() []index.IndexInternalID {
return ec.ids
}

func (ec *EligibleCollector) Total() uint64 {
Expand Down
5 changes: 0 additions & 5 deletions search/collector/heap.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
return rv
}

func (c *collectStoreHeap) Add(doc *search.DocumentMatch) *search.DocumentMatch {
c.add(doc)
return nil
}

func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
Expand Down
5 changes: 0 additions & 5 deletions search/collector/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
return rv
}

func (c *collectStoreList) Add(doc *search.DocumentMatch, size int) *search.DocumentMatch {
c.results.PushBack(doc)
return nil
}

func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
Expand Down
9 changes: 3 additions & 6 deletions search/collector/slice.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

package collector

import "github.com/blevesearch/bleve/v2/search"
import (
"github.com/blevesearch/bleve/v2/search"
)

type collectStoreSlice struct {
slice search.DocumentMatchCollection
Expand All @@ -29,11 +31,6 @@ func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
return rv
}

func (c *collectStoreSlice) Add(doc *search.DocumentMatch) *search.DocumentMatch {
c.slice = append(c.slice, doc)
return nil
}

func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
Expand Down
4 changes: 0 additions & 4 deletions search/collector/topn.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ func init() {
}

type collectorStore interface {
// Adds a doc to the store without considering size.
// Returns nil if the doc was added successfully.
Add(doc *search.DocumentMatch) *search.DocumentMatch

// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
// exceeded, nil is returned.
Expand Down
7 changes: 2 additions & 5 deletions search_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,12 +404,9 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
if err != nil {
return nil, err
}
filterHits := filterColl.Results()
filterHits := filterColl.IDs()
if len(filterHits) > 0 {
filterHitsMap[idx] = make([]index.IndexInternalID, len(filterHits))
for i, docMatch := range filterHits {
filterHitsMap[idx][i] = docMatch.IndexInternalID
}
filterHitsMap[idx] = filterHits
}
// set requiresFiltering regardless of whether there're filtered hits or
// not to later decide whether to consider the knnQuery or not
Expand Down

0 comments on commit e72f7c2

Please sign in to comment.