Skip to content

Commit

Permalink
hacky start
Browse files Browse the repository at this point in the history
  • Loading branch information
metonymic-smokey committed Dec 5, 2024
1 parent 4d4440e commit 1a8b62b
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 2 deletions.
29 changes: 27 additions & 2 deletions index_alias_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
flags := &preSearchFlags{
knn: requestHasKNN(req),
synonyms: !isMatchNoneQuery(req.Query),
bm25: true, // TODO Just force setting it to true to test
}
return preSearchDataSearch(ctx, req, flags, i.indexes...)
}
Expand Down Expand Up @@ -539,32 +540,40 @@ type asyncSearchResult struct {
type preSearchFlags struct {
knn bool
synonyms bool
bm25 bool // needs presearch for this too
}

// preSearchRequired checks if preSearch is required and returns a boolean flag
// It only allocates the preSearchFlags struct if necessary
func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags {
// Check for KNN query
knn := requestHasKNN(req)
var synonyms bool
var synonyms, bm25 bool
if !isMatchNoneQuery(req.Query) {
// Check if synonyms are defined in the mapping
if sm, ok := m.(mapping.SynonymMapping); ok && sm.SynonymCount() > 0 {
synonyms = true
}

// todo fix this cuRRENTLY ALL INDEX mappings are BM25 mappings, need to fix
// this is just a placeholder.
if _, ok := m.(mapping.BM25Mapping); ok {
bm25 = true
}
}
if knn || synonyms {
return &preSearchFlags{
knn: knn,
synonyms: synonyms,
bm25: bm25,
}
}
return nil
}

func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
var dummyQuery = req.Query
if !flags.synonyms {
if !flags.synonyms && !flags.bm25 {
// create a dummy request with a match none query
// since we only care about the preSearchData in PreSearch
dummyQuery = query.NewMatchNoneQuery()
Expand Down Expand Up @@ -646,6 +655,13 @@ func constructSynonymPreSearchData(rv map[string]map[string]interface{}, sr *Sea
return rv
}

func constructBM25PreSearchData(rv map[string]map[string]interface{}, sr *SearchResult, indexes []Index) map[string]map[string]interface{} {
for _, index := range indexes {
rv[index.Name()][search.BM25PreSearchDataKey] = sr.totalDocCount
}
return rv
}

func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
mergedOut := make(map[string]map[string]interface{}, len(indexes))
Expand All @@ -662,6 +678,9 @@ func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
if flags.synonyms {
mergedOut = constructSynonymPreSearchData(mergedOut, preSearchResult, indexes)
}
if flags.bm25 {
mergedOut = constructBM25PreSearchData(mergedOut, preSearchResult, indexes)
}
return mergedOut, nil
}

Expand Down Expand Up @@ -691,6 +710,12 @@ func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]
rv[index.Name()][search.SynonymPreSearchDataKey] = fts
}
}
// TODO Extend to more stats
if totalDocCount, ok := req.PreSearchData[search.BM25PreSearchDataKey].(uint64); ok {
for _, index := range indexes {
rv[index.Name()][search.BM25PreSearchDataKey] = totalDocCount
}
}
return rv, nil
}

Expand Down
21 changes: 21 additions & 0 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
}

var fts search.FieldTermSynonymMap
var count uint64

if !isMatchNoneQuery(req.Query) {
if synMap, ok := i.m.(mapping.SynonymMapping); ok {
if synReader, ok := reader.(index.SynonymReader); ok {
Expand All @@ -460,6 +462,12 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
}
}
}
if _, ok := i.m.(mapping.BM25Mapping); ok {
count, err = reader.DocCount()
if err != nil {
return nil, err
}
}
}

return &SearchResult{
Expand All @@ -469,6 +477,7 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
},
Hits: knnHits,
SynonymResult: fts,
totalDocCount: count,
}, nil
}

Expand Down Expand Up @@ -520,6 +529,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr

var knnHits []*search.DocumentMatch
var fts search.FieldTermSynonymMap
var bm25TotalDocs uint64
var ok bool
if req.PreSearchData != nil {
for k, v := range req.PreSearchData {
Expand All @@ -538,6 +548,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, fmt.Errorf("synonym preSearchData must be of type search.FieldTermSynonymMap")
}
}
case search.BM25PreSearchDataKey:
if v != nil {
bm25TotalDocs, ok = v.(uint64)
if !ok {
return nil, fmt.Errorf("bm25 preSearchData must be of type uint64")
}
}
}
}
}
Expand Down Expand Up @@ -565,6 +582,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts)
}

if bm25TotalDocs > 0 {
ctx = context.WithValue(ctx, search.BM25MapKey, bm25TotalDocs)
}

// This callback and variable handles the tracking of bytes read
// 1. as part of creation of tfr and its Next() calls which is
// accounted by invoking this callback when the TFR is closed.
Expand Down
4 changes: 4 additions & 0 deletions mapping/mapping.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,7 @@ type SynonymMapping interface {

SynonymCount() int
}

type BM25Mapping interface {
IndexMapping
}
23 changes: 23 additions & 0 deletions pre_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,24 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
}
}

// -----------------------------------------------------------------------------
type bm25PreSearchResultProcessor struct {
docCount uint64 // bm25 specific stats
}

func newBM25PreSearchResultProcessor() *bm25PreSearchResultProcessor {
return &bm25PreSearchResultProcessor{}
}

// TODO How will this work for queries other than term queries?
func (b *bm25PreSearchResultProcessor) add(sr *SearchResult, indexName string) {
b.docCount += (sr.totalDocCount)
}

func (b *bm25PreSearchResultProcessor) finalize(sr *SearchResult) {

}

// -----------------------------------------------------------------------------
// Synonym preSearchResultProcessor for handling Synonym presearch results
type synonymPreSearchResultProcessor struct {
Expand Down Expand Up @@ -118,6 +136,11 @@ func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) p
processors = append(processors, synonymProcessor)
}
}
if flags.bm25 {
if bm25Processtor := newBM25PreSearchResultProcessor(); bm25Processtor != nil {
processors = append(processors, bm25Processtor)
}
}
// Return based on the number of processors, optimizing for the common case of 1 processor
// If there are no processors, return nil
switch len(processors) {
Expand Down
4 changes: 4 additions & 0 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,10 @@ type SearchResult struct {
// special fields that are applicable only for search
// results that are obtained from a presearch
SynonymResult search.FieldTermSynonymMap `json:"synonym_result,omitempty"`

// The following fields are applicable to BM25 preSearch
// todo add more fields beyond docCount
totalDocCount uint64
}

func (sr *SearchResult) Size() int {
Expand Down
1 change: 1 addition & 0 deletions search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, te

func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
// TODO Instead of passing count from reader here, do it using the presearch phase stats.
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
Expand Down
2 changes: 2 additions & 0 deletions search/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool

const KnnPreSearchDataKey = "_knn_pre_search_data_key"
const SynonymPreSearchDataKey = "_synonym_pre_search_data_key"
const BM25PreSearchDataKey = "_bm25_pre_search_data_key"

const PreSearchKey = "_presearch_key"

Expand All @@ -162,6 +163,7 @@ func (f *FieldTermSynonymMap) MergeWith(fts FieldTermSynonymMap) {
}

const FieldTermSynonymMapKey = "_field_term_synonym_map_key"
const BM25MapKey = "_bm25_map_key"

const SearcherStartCallbackKey = "_searcher_start_callback_key"
const SearcherEndCallbackKey = "_searcher_end_callback_key"

0 comments on commit 1a8b62b

Please sign in to comment.