Skip to content

Commit

Permalink
feat: add more analysis on digests
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoduncan committed Jan 26, 2025
1 parent 74ada72 commit 0935c42
Show file tree
Hide file tree
Showing 12 changed files with 489 additions and 115 deletions.
118 changes: 36 additions & 82 deletions bases/criterium/src/criterium/analyse.clj
Original file line number Diff line number Diff line change
Expand Up @@ -106,64 +106,19 @@
(metric/filter-metrics
(metric/type-pred :quantitative)))
metric-configs (metric/all-metric-configs metrics-defs)
quantiles (sampled-stats/quantiles
(util/metric->values metrics-samples)
quantiles (methods/quantiles
metrics-samples
metric-configs
analysis)
quantiles-map (have
types/quantiles-map?
{:type :criterium/quantiles
:source-id samples-id
:quantiles quantiles
:metrics-defs metrics-defs
:transform collect-plan/identity-transforms})]
(merge
{:type :criterium/quantiles
:source-id samples-id
:metrics-defs metrics-defs}
quantiles))]
(assoc data-map id quantiles-map)))))

(defn outlier-count
[low-severe low-mild high-mild high-severe]
{:low-severe low-severe
:low-mild low-mild
:high-mild high-mild
:high-severe high-severe})

(defn classifier
[[^double low-severe ^double low-mild ^double high-mild ^double high-severe]]
(fn [^double x i]
(when-not (<= low-mild x high-mild)
[i (cond
(<= x low-severe) :low-severe
(< low-severe x low-mild) :low-mild
(> high-severe x high-mild) :high-mild
(>= x high-severe) :high-severe)])))

(defn samples-outliers [metric-configs all-quantiles samples]
(reduce
(fn sample-m [result metric-config]
(let [path (:path metric-config)
quantiles (have map? (get-in all-quantiles path)
{:all-quantiles all-quantiles})
thresholds (stats/boxplot-outlier-thresholds
(get quantiles 0.25)
(get quantiles 0.75))
classifier (classifier thresholds)
outliers (when (apply not= thresholds)
(into {}
(mapv classifier
(get samples path)
(range))))
outlier-counts (reduce-kv
(fn [counts _i v]
(update counts v inc))
(outlier-count 0 0 0 0)
outliers)]
(update-in result path
assoc
:thresholds thresholds
:outliers outliers
:outlier-counts outlier-counts)))
{}
metric-configs))

#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(defn outliers
"Detects statistical outliers in sample measurements using boxplot criteria.
Expand Down Expand Up @@ -214,19 +169,19 @@
"outlier analysis requires quantiles analysis"
{:quantiles-id quantiles-id
:available-ids (keys data-map)})))
(let [outliers (samples-outliers
(let [outliers (methods/outliers
metrics-samples
all-quantiles
metric-configs
(util/quantiles all-quantiles)
(util/metric->values metrics-samples))
{})
outliers-map (have
types/outliers-map?
{:type :criterium/outliers
:source-id samples-id
:transform collect-plan/identity-transforms
:quantiles-id quantiles-id
:metrics-defs metrics-defs
:outliers outliers
:num-samples (:num-samples metrics-samples)})]
(merge
{:type :criterium/outliers
:source-id samples-id
:quantiles-id quantiles-id
:metrics-defs metrics-defs}
outliers))]
(assoc data-map id outliers-map))))))

#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
Expand Down Expand Up @@ -273,21 +228,20 @@
(metric/filter-metrics
(metric/type-pred :quantitative)))
metric-configs (metric/all-metric-configs metrics-defs)
stats (sampled-stats/sample-stats
data-map
samples-id
(when outliers (util/outliers outliers))
stats (methods/stats
metrics-samples
outliers
metric-configs
analysis)
stats-map (have
types/stats-map?
{:type :criterium/stats
:metrics-defs metrics-defs
:source-id samples-id
:outliers-id outliers-id
:stats stats
:transform collect-plan/identity-transforms
:batch-size (:batch-size metrics-samples)})]
(merge
{:type :criterium/stats
:metrics-defs metrics-defs
:source-id samples-id
:outliers-id outliers-id
:batch-size (:batch-size metrics-samples)}
stats))]
(assoc data-map id stats-map))))))

#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
Expand Down Expand Up @@ -318,7 +272,7 @@
(:event-stats result))
;; Returns {:compilation {:time-ms 8 :sample-count 2} ...}"
([] (event-stats {}))
([{:keys [id samples-id metric-ids] :as _analysis}]
([{:keys [id samples-id metric-ids] :as analysis}]
(let [id (or id :event-stats)
samples-id (or samples-id :samples)]
(fn [data-map]
Expand All @@ -328,17 +282,17 @@
(metric/select-metrics metric-ids)
(metric/filter-metrics
(metric/type-pred :event)))
event-stats (sampled-stats/event-stats
event-stats (methods/event-stats
metrics-samples
metrics-defs
(util/metric->values metrics-samples))
analysis)
es-map (have
types/event-stats-map?
{:type :criterium/event-stats
:transform collect-plan/identity-transforms
:source-id samples-id
:metrics-defs metrics-defs
:event-stats event-stats
:batch-size (:batch-size metrics-samples)})]
(merge
{:type :criterium/event-stats
:source-id samples-id
:metrics-defs metrics-defs}
event-stats))]
(assoc data-map id es-map))))))

(defn- min-f
Expand Down
117 changes: 115 additions & 2 deletions bases/criterium/src/criterium/analyse/digest_samples.clj
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
(ns criterium.analyse.digest-samples
(:require
[criterium.analyse.methods :as methods]
[criterium.collect-plan :as collect-plan]
[criterium.types :as types]
[criterium.util.helpers :as util]
[criterium.util.invariant :refer [have?]]
[criterium.util.invariant :refer [have have?]]
[criterium.util.stats :as stats]
[criterium.util.t-digest :as t-digest]))

(defmethod methods/transform :criterium/digest
Expand All @@ -15,7 +17,9 @@
(assoc
result
path
(t-digest/transform (metric->digest path) f)))
(t-digest/transform
(metric->digest path)
f)))
{}
(mapv :path metric-configs))]
(->
Expand All @@ -25,3 +29,112 @@
(merge
{:metric->digest metric->digest'
:transform {:sample-> inv-f :->sample f}}))))

(defmethod methods/quantiles :criterium/digest
[digest-samples metric-configs options]
{:have [(have? types/digest-samples-map? digest-samples)]}
(let [metric->digest (util/metric->digest digest-samples)
quantiles (into [0.25 0.5 0.75] (:quantiles options))
quantiles (reduce
(fn qs [result path]
(let [digest (metric->digest path)]
(assoc-in
result path
(zipmap
quantiles
(mapv
(partial t-digest/quantile digest)
quantiles)))))
{}
(mapv :path metric-configs))]
{:type :criterium/quantiles
:quantiles quantiles
:transform collect-plan/identity-transforms}))

(defn outlier-count
[low-severe low-mild high-mild high-severe]
{:low-severe low-severe
:low-mild low-mild
:high-mild high-mild
:high-severe high-severe})

(defn classifier
[[^double low-severe ^double low-mild ^double high-mild ^double high-severe]]
(fn [^double x i]
(when-not (<= low-mild x high-mild)
[i (cond
(<= x low-severe) :low-severe
(< low-severe x low-mild) :low-mild
(> high-severe x high-mild) :high-mild
(>= x high-severe) :high-severe)])))

(defn digest-outliers
[digest quantiles]
(let [thresholds (stats/boxplot-outlier-thresholds
(get quantiles 0.25)
(get quantiles 0.75))
classifier (classifier thresholds)
outliers (when (apply not= thresholds)
(into {}
(mapv classifier
(t-digest/centroid-means digest)
(range))))
outlier-counts (reduce-kv
(fn [counts _i v]
(update counts v inc))
(outlier-count 0 0 0 0)
outliers)]
{:thresholds thresholds
:outliers outliers
:outlier-counts outlier-counts}))

(defmethod methods/outliers :criterium/digest
[digest-samples all-quantiles metric-configs _options]
{:have [(have? types/digest-samples-map? digest-samples)]}
(let [metric->digest (util/metric->digest digest-samples)
quantiles (util/quantiles all-quantiles)
outliers (reduce
(fn qs [result path]
(let [digest (metric->digest path)
quantiles (get-in quantiles path)]
(assoc-in
result path
(digest-outliers digest quantiles) )))
{}
(mapv :path metric-configs))]
{:type :criterium/outliers
:outliers outliers
:num-samples (t-digest/sample-count (first (vals metric->digest)))
:transform collect-plan/identity-transforms}))

(defn- digest-sample-states
[digest outliers]
(let [mean (t-digest/mean digest)
variance (t-digest/variance digest mean)
sigma (Math/sqrt variance)
three-sigma (* 3.0 sigma)]
{:n (t-digest/sample-count digest)
:mean mean
:variance variance
:sigma sigma
:mean-plus-3sigma (+ mean three-sigma)
:mean-minus-3sigma (- mean three-sigma)
:min-val (t-digest/minimum digest)
:max-val (t-digest/maximum digest)}))

(defmethod methods/stats :criterium/digest
[digest-samples outliers metric-configs options]
{:have [(have? types/digest-samples-map? digest-samples)]}
(let [metric->digest (util/metric->digest digest-samples)
outliers (when outliers (util/outliers outliers))
stats (reduce
(fn qs [result path]
(let [digest (have (metric->digest path))]
(assoc-in
result path
(digest-sample-states digest outliers) )))
{}
(mapv :path metric-configs))]
{:type :criterium/stats
:stats stats
:transform collect-plan/identity-transforms}))
20 changes: 20 additions & 0 deletions bases/criterium/src/criterium/analyse/methods.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,23 @@
"Transform sample values."
(fn [sample-map metric-configs f inv-f options]
(:type sample-map)))

(defmulti quantiles
"Calculate quantiles."
(fn [sample-map metric-configs options]
(:type sample-map)))

(defmulti outliers
"Calculate outliers."
(fn [sample-map quantiles metric-configs options]
(:type sample-map)))

(defmulti stats
"Calculate sample statistics."
(fn [sample-map outliers metric-configs options]
(:type sample-map)))

(defmulti event-stats
"Calculate sample statistics for events."
(fn [sample-map metric-configs options]
(:type sample-map)))
Loading

0 comments on commit 0935c42

Please sign in to comment.