From b9d9c3b0f8e8230c6aa6fc56334484ee44743655 Mon Sep 17 00:00:00 2001 From: Aditi Ahuja <48997495+metonymic-smokey@users.noreply.github.com> Date: Wed, 12 Jul 2023 08:57:59 +0530 Subject: [PATCH] Avoid double counting when sorting by field for faceted query (#1836) MB-57228: Deduplicate needed fields --- search/collector/topn.go | 15 +++++++- search/collector/topn_test.go | 44 ++++++++++++++++++++++ search_test.go | 71 +++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) diff --git a/search/collector/topn.go b/search/collector/topn.go index 808f396d0..270d5f924 100644 --- a/search/collector/topn.go +++ b/search/collector/topn.go @@ -371,7 +371,20 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc // SetFacetsBuilder registers a facet builder for this collector func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { hc.facetsBuilder = facetsBuilder - hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...) + fieldsRequiredForFaceting := facetsBuilder.RequiredFields() + // for each of these fields, append only if not already there in hc.neededFields. + for _, field := range fieldsRequiredForFaceting { + found := false + for _, neededField := range hc.neededFields { + if field == neededField { + found = true + break + } + } + if !found { + hc.neededFields = append(hc.neededFields, field) + } + } } // finalizeResults starts with the heap containing the final top size+skip diff --git a/search/collector/topn_test.go b/search/collector/topn_test.go index 9351955d0..31aef6339 100644 --- a/search/collector/topn_test.go +++ b/search/collector/topn_test.go @@ -19,7 +19,9 @@ import ( "context" "testing" + "github.com/blevesearch/bleve/v2/index/scorch" "github.com/blevesearch/bleve/v2/search" + "github.com/blevesearch/bleve/v2/search/facet" index "github.com/blevesearch/bleve_index_api" ) @@ -724,6 +726,48 @@ func TestCollectorChaining(t *testing.T) { } } +func setupIndex(t *testing.T) index.Index { + analysisQueue := index.NewAnalysisQueue(1) + i, err := scorch.NewScorch( + scorch.Name, + map[string]interface{}{ + "path": "", + }, + analysisQueue) + if err != nil { + t.Fatal(err) + } + err = i.Open() + if err != nil { + t.Fatal(err) + } + + return i +} + +func TestSetFacetsBuilder(t *testing.T) { + // Field common to both sorting and faceting. + sortFacetsField := "locations" + + coll := NewTopNCollector(10, 0, search.SortOrder{&search.SortField{Field: sortFacetsField}}) + + i := setupIndex(t) + indexReader, err := i.Reader() + if err != nil { + t.Fatal(err) + } + + fb := search.NewFacetsBuilder(indexReader) + facetBuilder := facet.NewTermsFacetBuilder(sortFacetsField, 100) + fb.Add("locations_facet", facetBuilder) + coll.SetFacetsBuilder(fb) + + // Should not duplicate the "locations" field in the collector. + if len(coll.neededFields) != 1 || coll.neededFields[0] != sortFacetsField { + t.Errorf("expected fields in collector: %v, observed: %v", []string{sortFacetsField}, coll.neededFields) + } +} + func BenchmarkTop10of0Scores(b *testing.B) { benchHelper(0, func() search.Collector { return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}) diff --git a/search_test.go b/search_test.go index a3191f8f3..c8330bfea 100644 --- a/search_test.go +++ b/search_test.go @@ -46,6 +46,77 @@ import ( index "github.com/blevesearch/bleve_index_api" ) +func TestSortedFacetedQuery(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMapping := NewIndexMapping() + indexMapping.TypeField = "type" + indexMapping.DefaultAnalyzer = "en" + documentMapping := NewDocumentMapping() + indexMapping.AddDocumentMapping("hotel", documentMapping) + + contentFieldMapping := NewTextFieldMapping() + contentFieldMapping.Index = true + contentFieldMapping.DocValues = true + documentMapping.AddFieldMappingsAt("content", contentFieldMapping) + documentMapping.AddFieldMappingsAt("country", contentFieldMapping) + + index, err := New(tmpIndexPath, indexMapping) + if err != nil { + t.Fatal(err) + } + defer func() { + err := index.Close() + if err != nil { + t.Fatal(err) + } + }() + + index.Index("1", map[string]interface{}{ + "country": "india", + "content": "k", + }) + index.Index("2", map[string]interface{}{ + "country": "india", + "content": "l", + }) + index.Index("3", map[string]interface{}{ + "country": "india", + "content": "k", + }) + + d, err := index.DocCount() + if err != nil { + t.Fatal(err) + } + if d != 3 { + t.Errorf("expected 3, got %d", d) + } + + query := NewMatchPhraseQuery("india") + query.SetField("country") + searchRequest := NewSearchRequest(query) + searchRequest.SortBy([]string{"content"}) + fr := NewFacetRequest("content", 100) + searchRequest.AddFacet("content_facet", fr) + + searchResults, err := index.Search(searchRequest) + if err != nil { + t.Fatal(err) + } + + expectedResults := map[string]int{"k": 2, "l": 1} + + for _, v := range searchResults.Facets { + for _, v1 := range v.Terms.Terms() { + if v1.Count != expectedResults[v1.Term] { + t.Errorf("expected %d, got %d", expectedResults[v1.Term], v1.Count) + } + } + } +} + func TestSearchResultString(t *testing.T) { tests := []struct {