Skip to content

Commit

Permalink
add flag to handle zscore
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Ye <[email protected]>
  • Loading branch information
yeya24 committed Jan 6, 2025
1 parent 9627fd6 commit 4cf5544
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 13 deletions.
1 change: 1 addition & 0 deletions cmd/thanos/downsample.go
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ func processDownsampling(
}
if stats.SeriesMaxSize > 0 {
meta.Thanos.IndexStats.SeriesMaxSize = stats.SeriesMaxSize
meta.Thanos.IndexStats.SeriesAvgSize = stats.SeriesAvgSize
meta.Thanos.IndexStats.SeriesP90Size = stats.SeriesP90Size
meta.Thanos.IndexStats.SeriesP99Size = stats.SeriesP99Size
meta.Thanos.IndexStats.SeriesP999Size = stats.SeriesP999Size
Expand Down
15 changes: 13 additions & 2 deletions cmd/thanos/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ type storeConfig struct {
chunkPoolSize units.Base2Bytes
estimatedMaxSeriesSize uint64
estimatedSeriesSizeStat string
estimatedSeriesSizeZScore float64
estimatedMaxChunkSize uint64
seriesBatchSize int
storeRateLimits store.SeriesSelectLimits
Expand Down Expand Up @@ -166,9 +167,12 @@ func (sc *storeConfig) registerFlag(cmd extkingpin.FlagClause) {
cmd.Flag("debug.estimated-max-series-size", "Estimated max series size. Setting a value might result in over fetching data while a small value might result in data refetch. Default value is 64KB.").
Hidden().Default(strconv.Itoa(store.EstimatedMaxSeriesSize)).Uint64Var(&sc.estimatedMaxSeriesSize)

cmd.Flag("estimated-series-size-stat", "Statistic to use to estimate block series size. This is currently used for lazy expanded posting series size estimation. Available options are max, p90, p99, p999 and p9999. Default value is "+string(store.BlockSeriesSizeMax)).
cmd.Flag("estimated-series-size-stat", "Statistic to use to estimate block series size. This is currently used for lazy expanded posting series size estimation. Available options are max, p90, p99, p99, p9999 and zscore. If zscore is picked, the actual zscore value is set via estimated-series-size-stat-zscore. Default value is "+string(store.BlockSeriesSizeMax)).
Default(string(store.BlockSeriesSizeMax)).
EnumVar(&sc.estimatedSeriesSizeStat, string(store.BlockSeriesSizeMax), string(store.BlockSeriesSizeP99), string(store.BlockSeriesSizeP999), string(store.BlockSeriesSizeP9999))
EnumVar(&sc.estimatedSeriesSizeStat, string(store.BlockSeriesSizeMax), string(store.BlockSeriesSizeP90), string(store.BlockSeriesSizeP99), string(store.BlockSeriesSizeP999), string(store.BlockSeriesSizeP9999), string(store.BlockSeriesSizeZScore))

cmd.Flag("estimated-series-size-stat-zscore", "Zscore is a statistical measurement that describes a value's relationship to the mean series size. Zscore 2 is calculated as average size + 2 * standard deviation. Use a larger zscore if you want a larger estimated series size. Default value is 2. Cannot be lower than 0.").
Default("2").Float64Var(&sc.estimatedSeriesSizeZScore)

cmd.Flag("debug.estimated-max-chunk-size", "Estimated max chunk size. Setting a value might result in over fetching data while a small value might result in data refetch. Default value is 16KiB.").
Hidden().Default(strconv.Itoa(store.EstimatedMaxChunkSize)).Uint64Var(&sc.estimatedMaxChunkSize)
Expand Down Expand Up @@ -408,6 +412,9 @@ func runStore(
}

estimatedSeriesSizeStat := strings.ToLower(conf.estimatedSeriesSizeStat)
if estimatedSeriesSizeStat == string(store.BlockSeriesSizeZScore) && conf.estimatedSeriesSizeZScore < 0 {
return errors.Errorf("estimated series size zscore cannot be lower than 0 (got %v)", conf.estimatedSeriesSizeZScore)
}

options := []store.BucketStoreOption{
store.WithLogger(logger),
Expand Down Expand Up @@ -454,6 +461,10 @@ func runStore(
if m.Thanos.IndexStats.SeriesP9999Size > 0 {
return uint64(m.Thanos.IndexStats.SeriesP9999Size)
}
case string(store.BlockSeriesSizeZScore):
if m.Thanos.IndexStats.SeriesSizeStdDev > 0 && m.Thanos.IndexStats.SeriesAvgSize > 0 {
return uint64(float64(m.Thanos.IndexStats.SeriesSizeStdDev)*conf.estimatedSeriesSizeZScore) + uint64(m.Thanos.IndexStats.SeriesAvgSize)
}
}

// Always fallback to series max size if none of other stats available.
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require (
github.com/cortexproject/promqlsmith v0.0.0-20240506042652-6cfdd9739a5e
github.com/cristalhq/hedgedhttp v0.9.1
github.com/dustin/go-humanize v1.0.1
github.com/DataDog/sketches-go v1.4.6
github.com/efficientgo/core v1.0.0-rc.3
github.com/efficientgo/e2e v0.14.1-0.20230710114240-c316eb95ae5b
github.com/efficientgo/tools/extkingpin v0.0.0-20220817170617-6c25e3b627dd
Expand Down Expand Up @@ -102,8 +103,6 @@ require (
gopkg.in/yaml.v3 v3.0.1
)

require github.com/DataDog/sketches-go v1.4.6

require (
cloud.google.com/go v0.115.1 // indirect
cloud.google.com/go/auth v0.9.3 // indirect
Expand Down
10 changes: 6 additions & 4 deletions pkg/block/block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func TestUpload(t *testing.T) {
testutil.Equals(t, 3, len(bkt.Objects()))
testutil.Equals(t, 3727, len(bkt.Objects()[path.Join(b1.String(), ChunksDirname, "000001")]))
testutil.Equals(t, 401, len(bkt.Objects()[path.Join(b1.String(), IndexFilename)]))
testutil.Equals(t, 702, len(bkt.Objects()[path.Join(b1.String(), MetaFilename)]))
testutil.Equals(t, 756, len(bkt.Objects()[path.Join(b1.String(), MetaFilename)]))

// File stats are gathered.
testutil.Equals(t, fmt.Sprintf(`{
Expand Down Expand Up @@ -185,10 +185,12 @@ func TestUpload(t *testing.T) {
],
"index_stats": {
"series_max_size": 16,
"series_avg_size": 16,
"series_p90_size": 16,
"series_p99_size": 16,
"series_p999_size": 16,
"series_p9999_size": 16
"series_p9999_size": 16,
"series_size_stddev": 1
}
}
}
Expand All @@ -200,7 +202,7 @@ func TestUpload(t *testing.T) {
testutil.Equals(t, 3, len(bkt.Objects()))
testutil.Equals(t, 3727, len(bkt.Objects()[path.Join(b1.String(), ChunksDirname, "000001")]))
testutil.Equals(t, 401, len(bkt.Objects()[path.Join(b1.String(), IndexFilename)]))
testutil.Equals(t, 702, len(bkt.Objects()[path.Join(b1.String(), MetaFilename)]))
testutil.Equals(t, 756, len(bkt.Objects()[path.Join(b1.String(), MetaFilename)]))
}
{
// Upload with no external labels should be blocked.
Expand Down Expand Up @@ -232,7 +234,7 @@ func TestUpload(t *testing.T) {
testutil.Equals(t, 6, len(bkt.Objects()))
testutil.Equals(t, 3727, len(bkt.Objects()[path.Join(b2.String(), ChunksDirname, "000001")]))
testutil.Equals(t, 401, len(bkt.Objects()[path.Join(b2.String(), IndexFilename)]))
testutil.Equals(t, 681, len(bkt.Objects()[path.Join(b2.String(), MetaFilename)]))
testutil.Equals(t, 735, len(bkt.Objects()[path.Join(b2.String(), MetaFilename)]))
}
}

Expand Down
1 change: 1 addition & 0 deletions pkg/block/metadata/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ type Thanos struct {

type IndexStats struct {
SeriesMaxSize int64 `json:"series_max_size,omitempty"`
SeriesAvgSize int64 `json:"series_avg_size,omitempty"`
SeriesP90Size int64 `json:"series_p90_size,omitempty"`
SeriesP99Size int64 `json:"series_p99_size,omitempty"`
SeriesP999Size int64 `json:"series_p999_size,omitempty"`
Expand Down
1 change: 1 addition & 0 deletions pkg/compact/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,7 @@ func (cg *Group) compact(ctx context.Context, dir string, planner Planner, comp
}
if stats.SeriesMaxSize > 0 {
thanosMeta.IndexStats.SeriesMaxSize = stats.SeriesMaxSize
thanosMeta.IndexStats.SeriesAvgSize = stats.SeriesAvgSize
thanosMeta.IndexStats.SeriesP90Size = stats.SeriesP90Size
thanosMeta.IndexStats.SeriesP99Size = stats.SeriesP99Size
thanosMeta.IndexStats.SeriesP999Size = stats.SeriesP999Size
Expand Down
11 changes: 6 additions & 5 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,12 @@ const (
type BlockSeriesSizeStat string

const (
BlockSeriesSizeMax BlockSeriesSizeStat = "max"
BlockSeriesSizeP90 BlockSeriesSizeStat = "p90"
BlockSeriesSizeP99 BlockSeriesSizeStat = "p99"
BlockSeriesSizeP999 BlockSeriesSizeStat = "p999"
BlockSeriesSizeP9999 BlockSeriesSizeStat = "p9999"
BlockSeriesSizeMax BlockSeriesSizeStat = "max"
BlockSeriesSizeP90 BlockSeriesSizeStat = "p90"
BlockSeriesSizeP99 BlockSeriesSizeStat = "p99"
BlockSeriesSizeP999 BlockSeriesSizeStat = "p999"
BlockSeriesSizeP9999 BlockSeriesSizeStat = "p9999"
BlockSeriesSizeZScore BlockSeriesSizeStat = "zscore"
)

const (
Expand Down
3 changes: 3 additions & 0 deletions pkg/testutil/e2eutil/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,10 +654,13 @@ func createBlock(
// For simplicity, use series size for all series size fields.
IndexStats: metadata.IndexStats{
SeriesMaxSize: seriesSize,
SeriesAvgSize: seriesSize,
SeriesP90Size: seriesSize,
SeriesP99Size: seriesSize,
SeriesP999Size: seriesSize,
SeriesP9999Size: seriesSize,
// Hardcode stddev here.
SeriesSizeStdDev: 1,
},
}, nil); err != nil {
return id, errors.Wrap(err, "finalize block")
Expand Down

0 comments on commit 4cf5544

Please sign in to comment.