From adbcfe1cd9b4933fccd61928ee227011b5142392 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Fri, 6 Dec 2024 15:28:57 +0530 Subject: [PATCH 1/5] fieldDict to support cardinality API --- index.go | 1 + 1 file changed, 1 insertion(+) diff --git a/index.go b/index.go index 4d8ecd3..f792da5 100644 --- a/index.go +++ b/index.go @@ -240,6 +240,7 @@ type FieldDict interface { Next() (*DictEntry, error) Close() error + Cardinality() int BytesRead() uint64 } From c8b43f20a1dc1a08acfdda7937fe93506ad73044 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 11 Dec 2024 12:16:35 +0530 Subject: [PATCH 2/5] defining similarity models for different scoring mechanisms --- indexing_options.go | 17 +++++++++++++++++ vector.go | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/indexing_options.go b/indexing_options.go index 9724cca..b25204d 100644 --- a/indexing_options.go +++ b/indexing_options.go @@ -24,6 +24,23 @@ const ( SkipFreqNorm ) +const ( + BM25Similarity = "bm25" + + TFIDFSimilarity = "tfidf" +) + +// similarity model just means the scoring mechanism used to rank documents fetched +// for a query performed on a field. Default is the bm25 scoring but can be set +// to other models like the legacy tf-idf. +const DefaultSimilarityModel = BM25Similarity + +// Supported similarity models +var SupportedSimilarityModels = map[string]struct{}{ + BM25Similarity: {}, + TFIDFSimilarity: {}, +} + func (o FieldIndexingOptions) IsIndexed() bool { return o&IndexField != 0 } diff --git a/vector.go b/vector.go index c1b5837..1057cf9 100644 --- a/vector.go +++ b/vector.go @@ -37,10 +37,10 @@ const ( CosineSimilarity = "cosine" ) -const DefaultSimilarityMetric = EuclideanDistance +const DefaultVectorSimilarityMetric = EuclideanDistance // Supported similarity metrics for vector fields -var SupportedSimilarityMetrics = map[string]struct{}{ +var SupportedVectorSimilarityMetrics = map[string]struct{}{ EuclideanDistance: {}, InnerProduct: {}, CosineSimilarity: {}, From 4444f3b96969a5334c83df317641a8d31937e5f5 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Tue, 7 Jan 2025 17:54:52 +0530 Subject: [PATCH 3/5] naming changes --- indexing_options.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/indexing_options.go b/indexing_options.go index b25204d..9002399 100644 --- a/indexing_options.go +++ b/indexing_options.go @@ -25,20 +25,19 @@ const ( ) const ( - BM25Similarity = "bm25" - - TFIDFSimilarity = "tfidf" + BM25Scoring = "bm25" + TFIDFScoring = "tfidf" ) -// similarity model just means the scoring mechanism used to rank documents fetched +// Scoring model indicates the algorithm used to rank documents fetched // for a query performed on a field. Default is the bm25 scoring but can be set // to other models like the legacy tf-idf. -const DefaultSimilarityModel = BM25Similarity +const DefaultScoringModel = TFIDFScoring // Supported similarity models -var SupportedSimilarityModels = map[string]struct{}{ - BM25Similarity: {}, - TFIDFSimilarity: {}, +var SupportedScoringModels = map[string]struct{}{ + BM25Scoring: {}, + TFIDFScoring: {}, } func (o FieldIndexingOptions) IsIndexed() bool { From 9e6aeb35180b59b888bb92fe20493a4ce9208c19 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Tue, 7 Jan 2025 08:03:05 -0700 Subject: [PATCH 4/5] Update comment --- indexing_options.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/indexing_options.go b/indexing_options.go index 9002399..a587ded 100644 --- a/indexing_options.go +++ b/indexing_options.go @@ -30,8 +30,7 @@ const ( ) // Scoring model indicates the algorithm used to rank documents fetched -// for a query performed on a field. Default is the bm25 scoring but can be set -// to other models like the legacy tf-idf. +// for a query performed on a text field. const DefaultScoringModel = TFIDFScoring // Supported similarity models From 243ba7ec9fbd4d94fe6656d90f304358a4da079b Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Tue, 7 Jan 2025 08:05:21 -0700 Subject: [PATCH 5/5] Update workflows --- .github/workflows/tests.yml | 2 +- go.mod | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index bc81391..496c91e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: test: strategy: matrix: - go-version: [1.19.x, 1.20.x, 1.21.x] + go-version: [1.20.x, 1.21.x, 1.22.x] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: diff --git a/go.mod b/go.mod index 26a78c4..8c0a530 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/blevesearch/bleve_index_api -go 1.20 +go 1.21