From 8cd83bfd2b5089f88efb49ebf8ede40554b71180 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sun, 2 Feb 2025 10:32:59 -0800 Subject: [PATCH] Extend posting-group-max-key-series-ratio for add all posting group (#8083) Signed-off-by: yeya24 --- pkg/store/bucket.go | 4 +-- pkg/store/lazy_postings.go | 14 +++++----- pkg/store/lazy_postings_test.go | 45 ++++++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index 359b8e57be..7e23a388d1 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -585,7 +585,7 @@ func WithLazyExpandedPostings(enabled bool) BucketStoreOption { } } -// WithPostingGroupMaxKeySeriesRatio configures a threshold to mark a posting group as lazy if it has more add keys. +// WithPostingGroupMaxKeySeriesRatio configures a threshold to mark a posting group as lazy if it has more add keys or remove keys. func WithPostingGroupMaxKeySeriesRatio(postingGroupMaxKeySeriesRatio float64) BucketStoreOption { return func(s *BucketStore) { s.postingGroupMaxKeySeriesRatio = postingGroupMaxKeySeriesRatio @@ -1076,7 +1076,7 @@ type blockSeriesClient struct { lazyExpandedPostingEnabled bool seriesMatchRatio float64 - // Mark posting group as lazy if it adds too many keys. 0 to disable. + // Mark posting group as lazy if it adds or removes too many keys. 0 to disable. postingGroupMaxKeySeriesRatio float64 lazyExpandedPostingsCount prometheus.Counter lazyExpandedPostingGroupByReason *prometheus.CounterVec diff --git a/pkg/store/lazy_postings.go b/pkg/store/lazy_postings.go index 325e92e904..2d6eea6062 100644 --- a/pkg/store/lazy_postings.go +++ b/pkg/store/lazy_postings.go @@ -166,6 +166,13 @@ func optimizePostingsFetchByDownloadedBytes( if seriesMatched <= 0 { break } + // Only mark posting group as lazy due to too many keys when those keys are known to be existent. + if postingGroupMaxKeySeriesRatio > 0 && maxSeriesMatched > 0 && + float64(pg.existentKeys)/float64(maxSeriesMatched) > postingGroupMaxKeySeriesRatio { + markPostingGroupLazy(pg, "keys_limit", lazyExpandedPostingSizeBytes, lazyExpandedPostingGroupsByReason) + i++ + continue + } if pg.addAll { // For posting group that has negative matchers, we assume we can underfetch // min(pg.cardinality, current_series_matched) * match ratio series. @@ -177,13 +184,6 @@ func optimizePostingsFetchByDownloadedBytes( seriesMatched -= underfetchedSeries underfetchedSeriesSize = underfetchedSeries * seriesMaxSize } else { - // Only mark posting group as lazy due to too many keys when those keys are known to be existent. - if postingGroupMaxKeySeriesRatio > 0 && maxSeriesMatched > 0 && - float64(pg.existentKeys)/float64(maxSeriesMatched) > postingGroupMaxKeySeriesRatio { - markPostingGroupLazy(pg, "keys_limit", lazyExpandedPostingSizeBytes, lazyExpandedPostingGroupsByReason) - i++ - continue - } underfetchedSeriesSize = seriesMaxSize * int64(math.Ceil(float64(seriesMatched)*(1-seriesMatchRatio))) seriesMatched = int64(math.Ceil(float64(seriesMatched) * seriesMatchRatio)) } diff --git a/pkg/store/lazy_postings_test.go b/pkg/store/lazy_postings_test.go index 3b61d4be08..940cb98902 100644 --- a/pkg/store/lazy_postings_test.go +++ b/pkg/store/lazy_postings_test.go @@ -530,39 +530,60 @@ func TestOptimizePostingsFetchByDownloadedBytes(t *testing.T) { }, }, { - name: "two posting groups with remove keys, minAddKeysToMarkLazy won't be applied", + name: "one group with add key and one group with remove keys, posting group with remove keys marked as lazy due to exceeding postingGroupMaxKeySeriesRatio", inputPostings: map[string]map[string]index.Range{ "foo": {"bar": index.Range{End: 8}}, - "bar": {"foo": index.Range{Start: 8, End: 16}, "baz": index.Range{Start: 16, End: 24}}, + "bar": {"foo": index.Range{Start: 8, End: 16}, "bar": index.Range{Start: 16, End: 24}, "baz": index.Range{Start: 24, End: 32}}, }, seriesMaxSize: 1000, seriesMatchRatio: 0.5, postingGroups: []*postingGroup{ - {addAll: true, name: "foo", removeKeys: []string{"bar"}}, - {addAll: true, name: "bar", removeKeys: []string{"baz", "foo"}}, + {name: "foo", addKeys: []string{"bar"}}, + {addAll: true, name: "bar", removeKeys: []string{"bar", "baz", "foo"}}, }, + postingGroupMaxKeySeriesRatio: 2, expectedPostingGroups: []*postingGroup{ - {addAll: true, name: "foo", removeKeys: []string{"bar"}, cardinality: 1, existentKeys: 1}, - {addAll: true, name: "bar", removeKeys: []string{"baz", "foo"}, cardinality: 2, existentKeys: 2}, + {name: "foo", addKeys: []string{"bar"}, cardinality: 1, existentKeys: 1}, + {addAll: true, name: "bar", removeKeys: []string{"bar", "baz", "foo"}, cardinality: 3, existentKeys: 3, lazy: true}, + }, + }, + { + name: "3 groups with add key and remove keys, posting group with remove keys marked as lazy due to exceeding postingGroupMaxKeySeriesRatio", + inputPostings: map[string]map[string]index.Range{ + "foo": {"bar": index.Range{End: 8}}, + "bar": {"foo": index.Range{Start: 8, End: 16}, "bar": index.Range{Start: 16, End: 24}, "baz": index.Range{Start: 24, End: 32}}, + "cluster": {"foo": index.Range{Start: 32, End: 136}}, + }, + seriesMaxSize: 1000, + seriesMatchRatio: 0.5, + postingGroups: []*postingGroup{ + {name: "foo", addKeys: []string{"bar"}}, + {addAll: true, name: "bar", removeKeys: []string{"bar", "baz", "foo"}}, + {name: "cluster", addKeys: []string{"foo"}}, + }, + postingGroupMaxKeySeriesRatio: 2, + expectedPostingGroups: []*postingGroup{ + {name: "foo", addKeys: []string{"bar"}, cardinality: 1, existentKeys: 1}, + {addAll: true, name: "bar", removeKeys: []string{"bar", "baz", "foo"}, cardinality: 3, existentKeys: 3, lazy: true}, + {name: "cluster", addKeys: []string{"foo"}, cardinality: 25, existentKeys: 1}, }, }, { - // This test case won't be optimized in real case because it is add all - // so doesn't make sense to optimize postings fetching anyway. - name: "two posting groups with remove keys, small postings and large series size", + name: "two posting groups with remove keys, postingGroupMaxKeySeriesRatio won't be applied", inputPostings: map[string]map[string]index.Range{ "foo": {"bar": index.Range{End: 8}}, - "bar": {"foo": index.Range{Start: 8, End: 16}}, + "bar": {"foo": index.Range{Start: 8, End: 16}, "baz": index.Range{Start: 16, End: 24}}, }, seriesMaxSize: 1000, seriesMatchRatio: 0.5, postingGroups: []*postingGroup{ {addAll: true, name: "foo", removeKeys: []string{"bar"}}, - {addAll: true, name: "bar", removeKeys: []string{"foo"}}, + {addAll: true, name: "bar", removeKeys: []string{"baz", "foo"}}, }, + postingGroupMaxKeySeriesRatio: 1, expectedPostingGroups: []*postingGroup{ - {addAll: true, name: "bar", removeKeys: []string{"foo"}, cardinality: 1, existentKeys: 1}, {addAll: true, name: "foo", removeKeys: []string{"bar"}, cardinality: 1, existentKeys: 1}, + {addAll: true, name: "bar", removeKeys: []string{"baz", "foo"}, cardinality: 2, existentKeys: 2}, }, }, {