diff --git a/.clj-kondo/config.edn b/.clj-kondo/config.edn new file mode 100644 index 0000000..cfc2dcd --- /dev/null +++ b/.clj-kondo/config.edn @@ -0,0 +1,12 @@ +{:config-paths + ["../bases/criterium.arg-gen/resources/clj-kondo.exports/criterium/arg-gen/"] + :hooks + {:analyze-call + {criterium.view/def-multi-view hooks.impl/def-multi-view}} + :lint-as {clojure.test.check.clojure-test/defspec clojure.core/def + clojure.test.check.properties/for-all clojure.core/let + criterium.util/optional-require clojure.core/require} + :linters {:unused-bindings {:exclude-defmulti-args true} + :used-underscored-binding {:level :warning} + :unsorted-required-namespaces {:level :warning}} + :output {:exclude-files ["src/data_readers.clj"]}} diff --git a/.clj-kondo/hooks/impl.clj b/.clj-kondo/hooks/impl.clj new file mode 100644 index 0000000..5439eb7 --- /dev/null +++ b/.clj-kondo/hooks/impl.clj @@ -0,0 +1,33 @@ +(ns hooks.impl + "Hooks for criterium implementation linting" + (:require + [clj-kondo.hooks-api :as api])) + +(defn def-multi-view + "Rewrite def-multi-view to define the funcrion and multi-method." + [{:keys [node]}] + (let [[_ n] (:children node) + mm-n (api/token-node (symbol (str (:string-value n) "*"))) + new-node (api/list-node + (list + (api/token-node 'do) + (api/list-node + (list + (api/token-node 'defmulti) + mm-n + (api/token-node 'identity))) + (api/list-node + (list + (api/token-node 'defn) n + (api/list-node + (list (api/vector-node []))) + (api/list-node + (list + (api/vector-node [(api/token-node 'arg)]) + (api/token-node 'arg))))) + ;; use the multi-view + (api/list-node + (list n))))] + ;; un-comment below to debug changes + ;; (prn :def-multi-view (api/sexpr new-node)) + {:node (with-meta new-node (meta node))})) diff --git a/.cljfmt.edn b/.cljfmt.edn new file mode 100644 index 0000000..38f8570 --- /dev/null +++ b/.cljfmt.edn @@ -0,0 +1,8 @@ +{:remove-surrounding-whitespace? true + :remove-trailing-whitespace? true + :remove-consecutive-blank-lines? true + :insert-missing-whitespace? true + :align-associative? true + :indents {defrecord [[:inner 0 2]] + as-> [[:inner 0]] + cond-> [[:inner 0]]}} diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..a0b33bf --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,11 @@ +((clojure-mode + (clojure-indent-style . 'always-align) + (clojure-special-arg-indent-factor . 1) ; for cljfmt equivalence + (cider-preferred-build-tool . "clojure-cli") + (cider-clojure-cli-aliases . "dev:test") + (eval . + (define-clojure-indent + ;; Please keep this list sorted + (arg-gen/measured 2))) + (fill-column . 80) + (comment-fill-column . 72))) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..4034d71 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,89 @@ +# A workflow run is made up of one or more jobs that can run +# sequentially or in parallel +on: [push, pull_request] +name: Run tests + +jobs: + tests: + name: Tests + + strategy: + matrix: + # TODO add windows-latest + os: [ubuntu-latest, macOS-latest] + + runs-on: ${{ matrix.os }} + + steps: + - name: Prepare java + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '11' + + # - uses: joschi/setup-jdk@v2 + # with: + # java-version: '11' # The OpenJDK version to make available on the path + # architecture: 'x64' # defaults to 'x64' + + - name: Install clojure tools + uses: DeLaGuardo/setup-clojure@10.1 + with: + cli: 1.11.1.1208 + + - name: Install node + uses: actions/setup-node@v3 + with: + node-version: '16' + + - name: Install vega-cli + run: | + npm install -g vega vega-lite vega-cli + + - name: Install babashka + uses: turtlequeue/setup-babashka@v1.5.0 + with: + babashka-version: 0.7.0 + + - name: Install makejack + run: | + curl -O https://raw.githubusercontent.com/hugoduncan/makejack/6966696979c9b9c0c97e54f82a5d4096e4105240/mj + chmod 755 mj + mv mj /usr/local/bin + + - name: Setup clojure-lsp + uses: clojure-lsp/setup-clojure-lsp@v1 + with: + clojure-lsp-version: 2022.12.09-15.51.10 + + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 # all commits for git-rev-count + + - name: poly check + run: | + set -x + clojure -M:poly check + + - name: build and install jar + run: | + set -x + ( + cd projects/criterium + clojure -T:build build :verbose true + ) + + - name: poly test :all + run: | + set -x + mj compile-agent-cpp + mj javac-agent + clojure -M:poly test :all + + - name: lint + run: | + set -x + clojure-lsp diagnostics + clojure-lsp clean-ns --dry + clojure-lsp format --dry diff --git a/.gitignore b/.gitignore index 188e9ff..9cc2abe 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,17 @@ classes autodoc/** doc/** java.hprof.* +.cpcache .lein-deps-sum .lein-failures .ritz-exception-filters target checkouts +pom.* +.lsp +version.edn +*.dylib +*.log +*.dSYM +.clj-kondo/*/ +.aider* diff --git a/.lsp/config.edn b/.lsp/config.edn new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/.lsp/config.edn @@ -0,0 +1 @@ +{} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..33617ac --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +.PHONY: test test-watch docs eastwood cljfmt cloverage release deploy clean + +VERSION ?= 0.4.6-SNAPSHOT + +TEST_PROFILES := :test + +FULL_PROFILES := :test.check:clj-xchart + + +kondo: + clj-kondo --lint src + +cljfmt: + lein with-profile +$(VERSION),+cljfmt cljfmt check + +test: + #clojure -A:kaocha$(FULL_PROFILES) + clojure -Srepro -M:poly test :all + +api-doc: + clj -A:codox$(FULL_PROFILES) + +docs: doc-src/publish.el + @echo "Publishing..." + emacsclient --eval "(load-file \"doc-src/publish.el\")(org-publish-all)" diff --git a/README.ALPHA.md b/README.ALPHA.md new file mode 100644 index 0000000..04a409e --- /dev/null +++ b/README.ALPHA.md @@ -0,0 +1,107 @@ +# Criterium 0.5.x + +The goal is to make criterium more data driven, and usable in more +contexts. + +## Status + +Early alpha. Breaking changes will be made. + +## Quick Start + +For the moment there will be no jar release. Please use as a git +dependency, taking a sha from the master branch. + +``` +{:deps {io.github.hugoduncan/criterium + {:git/sha "xxxx" + :deps/root "projects/criterium"}} +``` + +To time an expression use `criterium.bench/bench`. + +```clojure +(require '[criterium.bench :as bench]) +(bench/bench (criterium.jvm/wait 10000)) ; 10us busy wait +``` + +If you are using portal, try: + +```clojure +(require 'criterium.viewer.portal) +(bench/bench (criterium.jvm/wait 10000) + :viewer :portal + :benchmark criterium.benchmarks/log-histogram) +``` + +## New features + +- improved accuracy on very fast functions. +- charting via portal +- thread memory allocation collection +- agent to allow capture of all allocations during the execution of an + expression. + +## Design + +The `bench` cli is a thin layer over the following. + +Criterium separates metrics collection from the processing of the metrics +to analyse and display them. + +We provide multiple ways to collect metrics; running a collection plan, +instrumenting a function, or triggering collection on events. You can +also provide metrics by any other means you like. + +Benchmarks are pipelines that can be run with different reporting +targets. + +### Metrics Collection + +Controlled sampling is based on the concept of a "measured". This +consists of a pair of functions, a state constructor, used to capture +arguments, and a measure function, that takes the output of the state +constructor as an argument, together with an evaluation count, runs the +benchmark subject the given number of times, and returns the total time +taken. + +This design addresses both time resolution and argument capture +concerns. + +Other ways of sampling are provided. You can instrument a function to +collect samples, or can use a trigger which collects samples based on +deltas between successive firing of the trigger. + +### Benchmark + +Once samples have been collected they are passed to the benchmark, which +provides analysis and viewing. + +#### Analysis + +The analysis is based on a set of analysis functions that read and write +paths in a data map. Each analysis reads and writes default paths in +the map, but these can be explicitly specified. + +#### View + +Viewing is based on a set of viewing function that read paths from +the data map. Each view uses default paths, but these can be +explicitly specified. + +The built in views support different viewers. The default viewer, +`:print`, prints results in human readable format. There are also +`:pprint` and `:portal` viewers. + +The `:portal` viewer is capable of displaying charts. + +## Development Plan + +Features that will probably get added: + +- add KDE to improve estimation of the PDF. +- add bi-modal detection, probably based on KDE. +- add a percentile sampling mode based oh HDRHistogram +- add charting and regressions around change in metrics with a varied parameter +- add charting to compare alternate implementations of expressions +- add fitting of different distributions (e.g. hyperbolic secant) diff --git a/README.md b/README.md index 0fdf4c3..c6540d3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,18 @@ # Criterium + +## Alpha version + +Master is now on 0.5.x, which should be considered ALPHA, subject to +breaking changes on new features. + +See [ALPHA Readme](README.ALPHA.md) + +If you try it, please give feedback on any breaking changes with 0.4.5. + + +## Stable version 0.4.5 + Criterium measures the computation time of an expression. It is designed to address some of the pitfalls of benchmarking, and benchmarking on the JVM in particular. @@ -103,6 +116,12 @@ Serial correlation detection. Multimodal distribution detection. Use kernel density estimators? +## Non-Goals + +Expressions are evaluated inside a function call. We deliberately do +not seek ultimate accuracy for very quick expressions where the +function overhead may be significant. + ## Releasing To release, run the `release.sh` script. This requires that you have diff --git a/agent-cpp/Makefile b/agent-cpp/Makefile new file mode 100644 index 0000000..a9b039a --- /dev/null +++ b/agent-cpp/Makefile @@ -0,0 +1,26 @@ +JAVA_HOME ?= /Library/Java/JavaVirtualMachines/jdk-11.0.12.jdk/Contents/Home +JDK ?= $(JAVA_HOME) + +OSNAME ?= darwin + +LIBNAME = criterium + +# Linux GNU C Compiler +ifeq ($(OSNAME), linux) + CC=gcc + PLATFORM_CFLAGS=-std=c++17 -O2 -fPIC -shared + CFLAGS += -I$(JDK)/include -I$(JDK)/include/$(OSNAME) + LIBRARY=lib$(LIBNAME).so + LDFLAGS= +endif + +ifeq ($(OSNAME), darwin) + CC=clang + PLATFORM_CFLAGS=-std=c++17 -g -shared + CFLAGS += -I$(JDK)/include -I$(JDK)/include/$(OSNAME) + LIBRARY=lib$(LIBNAME).dylib + LDFLAGS=-lstdc++ +endif + +all: + $(CC) $(PLATFORM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o$(LIBRARY) agent.cpp diff --git a/agent-cpp/agent.cpp b/agent-cpp/agent.cpp new file mode 100644 index 0000000..3c16549 --- /dev/null +++ b/agent-cpp/agent.cpp @@ -0,0 +1,905 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// global ref to the JVMTI environment +static jvmtiEnv* jvmti = NULL; +static bool vm_dead = false; + +class raw_monitor { + jrawMonitorID id; +public: + raw_monitor(jrawMonitorID id) : id(id) { + jvmti->RawMonitorEnter(id); + } + ~raw_monitor() { + jvmti->RawMonitorExit(id); + } +}; + +template +class allocated { + T _ptr; +public: + allocated() noexcept : _ptr(0) {} + allocated(allocated&& l) noexcept + : _ptr(std::exchange(l._ptr, static_cast(0))) + {} + ~allocated() { jvmti->Deallocate((unsigned char*)_ptr);} + operator T& () { return _ptr; } + T* operator & () { return &_ptr; } +}; + +template +class local_ref { + T _ref; + JNIEnv* _env; +public: + local_ref(JNIEnv* env, T ref) : _ref(std::move(ref)), _env(env) {} + local_ref(JNIEnv* env) : _ref(NULL), _env(env) {} + local_ref(local_ref&& l) + : _ref(std::exchange(l._ref, NULL)), + _env(std::exchange(l._env, NULL)) + {} + ~local_ref() { + if (_ref != NULL && !vm_dead) { + _env->DeleteLocalRef(_ref); + _ref = NULL; + } + } + auto operator = (T ref) { _ref = ref; return *this; } + T* operator & () { return &_ref; } + operator T& () { return _ref; } +}; + +template +class global_ref { + T _ref; + JNIEnv* _env; +public: + global_ref(JNIEnv* env, T ref) + : _env(env) { + _ref = static_cast(env->NewGlobalRef(ref)); + } + /* global_ref(local_ref&& l) */ + /* : _ref(std::exchange(l._ref, NULL)), */ + /* _env(std::exchange(l._env, NULL)) */ + /* {} */ + // global_ref(JNIEnv* env) : _ref(0), _env(env) {} + ~global_ref() { + if (_ref != NULL && !vm_dead) { + _env->DeleteGlobalRef(_ref); + _ref = NULL; + } + } + // global_ref& operator = (T& ref) { + // _ref = _env->NewGlobalRef(ref); + // return *this; + // } + T* operator & () { return &_ref; } + operator T& () { return _ref; } +}; + +template local_ref mk_local_ref(JNIEnv* env, T t) { + return local_ref(env, t); +} + +static jrawMonitorID sample_lock; +static jrawMonitorID tag_lock; + +static jmethodID thread_getId_method = NULL; + +static std::unique_ptr> agent_class; +static std::unique_ptr> agent_allocation_start_marker_class; +static std::unique_ptr> agent_allocation_finish_marker_class; +static std::unique_ptr> agent_allocation_class; + +static jmethodID agent_allocation_ctor = NULL; +static char const* agent_allocation_class_args = + "(Ljava/lang/String;JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;JJJ)V"; + +static jmethodID agent_data1_method = NULL; +static jmethodID agent_data8_method = NULL; +static jfieldID agent_state_field = NULL; + +enum States : jlong { + passive, + allocation_tracing_starting = 10, + allocation_tracing_active = 11, + allocation_tracing_stopping = 15, + allocation_tracing_flushing = 16, + allocation_tracing_flushed = 17 +}; + +static jlong agent_state; + +void set_state(jlong state) { + agent_state = state; + // printf("In state %ld\n", state); +} + +void set_state(JNIEnv* env, jlong state) { + env->SetStaticLongField(*agent_class, agent_state_field, state); + set_state(state); +} + +static char const* allocation_start_marker = + "Lcriterium/agent/Agent$AllocationStartMarker;"; + +static char const* allocation_finish_marker = + "Lcriterium/agent/Agent$AllocationFinishMarker;"; + +static char const* allocation_class_name = + "Lcriterium/agent/Allocation;"; + +// static jobject the_sampler = NULL; +// static jobject the_sample_fn = NULL; +// static jmethodID the_sample_fn_invoke = NULL; +// static jfieldID the_sample_handshake = NULL; +// static jthread the_sample_thread = NULL; +// static bool sampler_enabled = false; + +static jlong next_object_tag = 0; // read and inc with next_tag() + + +const char *IFn = "clojure/lang/IFn"; + +jclass ifn(JNIEnv* env) { + auto ifn = (env)->FindClass(IFn); + if (ifn == NULL) { + printf("Ifn not found\n"); + }else { + // printf("IFn found \n"); + } + return ifn; +} + +const char *invoke_sig = "(Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"; + +jmethodID invoke_method_id(JNIEnv* env) { + auto klass = ifn(env); + auto invoke = (env)->GetMethodID(klass, "invoke", invoke_sig); + if (invoke == NULL) { + printf("invoke method not found\n"); + } else { + // printf("invoke method found\n"); + } + return invoke; +} + +jmethodID class_invoke_method_id(JNIEnv* env, jclass klass) { + auto invoke = (env)->GetMethodID(klass, "invoke", invoke_sig); + if (invoke == NULL) { + printf("invoke method not found\n"); + } else { + // printf("invoke method found\n"); + } + return invoke; +} + +jlong next_tag() { + auto monitor = std::make_unique(tag_lock); + return next_object_tag++; +} + +static char const *no_file_name = "NO_SOURCC"; + +// Structure used to record allocations +struct alloc_rec { + std::string obj_class; + jlong obj_size; + + std::string call_class; + std::string call_method; + std::string call_file; + jlong call_line; + + std::string alloc_class; + std::string alloc_method; + std::string alloc_file; + jlong alloc_line; + + jlong thread_id; + jlong freed; + + jlong tag; + bool disable_marker; + + alloc_rec(char const * obj_class, + jlong obj_size, + char const * call_class, + char const * call_method, + char const * call_file, + jlong call_line, + char const * alloc_class, + char const * alloc_method, + char const * alloc_file, + jlong alloc_line, + jlong thread_id, + jlong tag) + : obj_class(obj_class), + obj_size(obj_size), + call_class(call_class), + call_method(call_method), + call_file(call_file == NULL ? no_file_name : call_file), + call_line(call_line), + alloc_class(alloc_class), + alloc_method(alloc_method), + alloc_file(alloc_file == NULL ? no_file_name : alloc_file), + alloc_line(alloc_line), + thread_id(thread_id), + tag(tag), + freed(false), + disable_marker(false) + { } +}; + +typedef std::vector> allocs_t; +typedef std::map allocs_by_tag_t; +static allocs_t allocs; +static auto allocs_by_tag = allocs_by_tag_t(); + +#define MAX_FRAMES 1024 + +auto calling_frame(jvmtiEnv* jvmti, + JNIEnv* env, + jvmtiFrameInfo* frames, + jint num_frames) { + jint framei = 0; + auto class_name = allocated(); + + for (; framei < num_frames; framei++) { + auto declaring_class = local_ref(env); + auto err = jvmti->GetMethodDeclaringClass(frames[framei].method, + &declaring_class); + if (err!=JVMTI_ERROR_NONE) { + printf("Error gettimg declaring class: %d\n", err); + } else { + err = jvmti->GetClassSignature(declaring_class, + &class_name, + NULL); + if (err!=JVMTI_ERROR_NONE) { + printf("Error gettimg declaring class name: %d\n", err); + } else { + // printf("class : %d %s\n", framei, (char*)class_name); + // TODO make the filters configurable + if (strncmp(class_name, "Ljava/", 6)!=0 + && strncmp(class_name, "Lcom/sun/", 9) != 0 + && strncmp(class_name, "Ljdk/", 5) != 0 + && strncmp(class_name, "Ljavax/", 7)!=0 + && strncmp(class_name, "Lsun/management", 15) != 0 + && strncmp(class_name, "Lclojure/", 9) != 0 + ) { + break; + } + } + } + } + if (framei>=num_frames) { + framei = 0; + } + return std::make_tuple(framei, std::move(class_name)); +} + + +auto frame_detail(jvmtiEnv* jvmti, JNIEnv* env, jvmtiFrameInfo& frame) { + auto declaring_class = local_ref(env); + auto err = jvmti->GetMethodDeclaringClass(frame.method, &declaring_class); + + auto class_name = allocated(); + err = jvmti->GetClassSignature(declaring_class, + &class_name, + NULL); + + auto method_name = allocated(); + err = (jvmti)->GetMethodName(frame.method, + &method_name, + NULL, + NULL); + + jint entry_count; + auto line_table = allocated(); + + auto error = (jvmti)->GetLineNumberTable(frame.method, + &entry_count, + &line_table); + jint line_num; + + if (error == JVMTI_ERROR_NONE) { + line_num = line_table[0].line_number; + for ( auto i = 1 ; i < entry_count ; i++ ) { + if ( frame.location < line_table[i].start_location) { + break; + } + line_num = line_table[i].line_number; + } + } else { + line_num = -1; + } + + auto source_name = allocated(); + err = jvmti->GetSourceFileName(declaring_class, &source_name); + if (err == JVMTI_ERROR_ABSENT_INFORMATION) { + // do nothing - should have source_name == 0 + } else if (err!=JVMTI_ERROR_NONE) { + printf("Failed to get source file name: %d\n", err); + } + + return std::make_tuple(std::move(class_name), + std::move(method_name), + std::move(source_name), + line_num); +} + +auto allocation_record(jvmtiEnv* jvmti, + JNIEnv* env, + const char* class_sig, + jlong size, + jthread thread, + jint num_frames, + jvmtiFrameInfo* frames) { + + + jint framei=0; + + auto [f0_class_name, f0_method, f0_source, f0_line ] + = frame_detail(jvmti, env, frames[0]); + + + auto cf = calling_frame(jvmti, env, frames, num_frames); + framei = std::get<0>(cf); + + auto [fi_class_name, fi_method, fi_source, fi_line ] + = frame_detail(jvmti, env, frames[framei]); + + jlong tid = env->CallLongMethod(thread, thread_getId_method); + + return std::make_unique(class_sig, + size, + fi_class_name, + fi_method, + fi_source, + static_cast(fi_line), + f0_class_name, + f0_method, + f0_source, + static_cast(f0_line), + tid, + next_tag()); +} + +bool is_allocs_empty() { + auto monitor = std::make_unique(sample_lock); + return allocs.empty(); +} + +void JNICALL SampledObjectAlloc(jvmtiEnv* jvmti, + JNIEnv* env, + jthread thread, + jobject object, + jclass object_klass, + jlong size) { + + auto class_sig = allocated(); + { + auto err = jvmti->GetClassSignature(object_klass, &class_sig, NULL); + if ( err != 0) { + printf("Failed to get class name\n" ); + return; + } + } + + if (is_allocs_empty() && agent_state == allocation_tracing_starting) { + if (0 == std::strcmp(class_sig, allocation_start_marker)) { + // set the state to allow the sampler to know that we have + // actually activated + set_state(env, allocation_tracing_active); + } + } else { + + jvmtiFrameInfo frames[MAX_FRAMES]; + + jint count; + { + auto err = jvmti->GetStackTrace(thread, 0, MAX_FRAMES, frames, &count); + if (err != 0) { + printf("Failed to get stack\n"); + return; + } + } + + auto rec = + allocation_record(jvmti, env, class_sig, size, thread, count, frames); + + // TODO use the alloc_rec pointer as the tag? + jvmti->SetTag(object, rec->tag); + + if (agent_state == allocation_tracing_stopping + && + (0 == std::strcmp(class_sig, allocation_finish_marker))) { + + jvmti->SetEventNotificationMode(JVMTI_DISABLE, + JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + NULL); + rec->disable_marker = true; + set_state(env, allocation_tracing_flushing); + } + + auto monitor = std::make_unique(sample_lock); + allocs_by_tag.emplace(rec->tag, rec.get()); + allocs.push_back(std::move(rec)); + } +} + +// void turn_off_allocation_tracing(jvmtiEnv* jvmti, +// JNIEnv* env, +// jthread thread, +// jobject object) { +// jvmti->SetEventNotificationMode(JVMTI_DISABLE, +// JVMTI_EVENT_OBJECT_FREE, +// NULL); +// auto monitor = std::make_unique(sample_lock); + +// // Remove tags +// { +// // printf("remove tags\n"); +// auto tags=std::vector(allocs.size()); +// std::transform(allocs.begin(), +// allocs.end(), +// std::back_inserter(tags), +// std::mem_fn(&alloc_rec::tag)); + +// // printf("found %ld tags\n", tags.size()); + +// if (tags.size()>0) { +// // auto count = allocated(); +// // auto objects = allocated(); +// // auto object_tags = allocated(); +// jint count; +// jobject* objects; +// jlong* object_tags; +// auto err = jvmti->GetObjectsWithTags(tags.size(), +// tags.data(), +// &count, +// &objects, +// &object_tags); +// if (err!= JVMTI_ERROR_NONE) { +// printf("problem %d", err); +// } +// // printf("found %d objects to untag\n", count); +// for (jint i=0; i< count; ++i) { +// jvmti->SetTag(objects[i], NULL); +// } + +// // TODO unreference the objects +// } +// // printf("remove tags done\n"); +// } + +// allocs_by_tag.clear(); +// // printf("Disabled\n"); +// } + +auto all_tags(allocs_t& allocs) { + auto tags=std::vector(allocs.size()); + std::transform(allocs.begin(), + allocs.end(), + std::back_inserter(tags), + std::mem_fn(&alloc_rec::tag)); + return tags; +} + +void untag_objects(jvmtiEnv* jvmti) { + // auto monitor = std::make_unique(sample_lock); + auto tags = all_tags(allocs); + if (tags.size()>0) { + jint count; + auto objects = allocated(); + auto object_tags = allocated(); + // jint count; + // jobject* objects; + // jlong* object_tags; + auto err = jvmti->GetObjectsWithTags(tags.size(), + tags.data(), + &count, + &objects, + &object_tags); + if (err!= JVMTI_ERROR_NONE) { + printf("problem %d", err); + } + // printf("found %d objects to untag\n", count); + for (jint i=0; i< count; ++i) { + jvmti->SetTag(objects[i], 0); + } + + // TODO unreference the objects + } + // printf("remove tags done\n"); + + allocs_by_tag.clear(); + // printf("Disabled\n"); +} + + +void JNICALL +ObjectFree(jvmtiEnv *jvmti, jlong tag) { + auto monitor = std::make_unique(sample_lock); + try { + alloc_rec* rec = allocs_by_tag.at(tag); + rec->freed = true; + if (agent_state == allocation_tracing_flushing && rec->disable_marker) { + jvmti->SetEventNotificationMode(JVMTI_DISABLE, + JVMTI_EVENT_OBJECT_FREE, + NULL); + set_state(allocation_tracing_flushed); + } + + } catch(const std::out_of_range&) { + } +} + +namespace java { + local_ref string(JNIEnv* env, const char* s) { + return mk_local_ref(env, (env)->NewStringUTF(s)); + } + local_ref string(JNIEnv* env, const std::string& s) { + return mk_local_ref(env, (env)->NewStringUTF(s.c_str())); + } +} + +void allocation_tracing_report(JNIEnv* env) { + + auto monitor = std::make_unique(sample_lock); + + for (auto& alloc : allocs) { + auto class_jstr = java::string(env, alloc->obj_class); + + auto alloc_class_jstr = java::string(env, alloc->alloc_class); + auto alloc_method_jstr = java::string(env, alloc->alloc_method); + auto alloc_file_jstr = java::string(env, alloc->alloc_file); + + auto call_class_jstr = java::string(env, alloc->call_class); + auto call_method_jstr = java::string(env, alloc->call_method); + auto call_file_jstr = java::string(env, alloc->call_file); + + /* printf("calling constructor %ld %ld %ld %ld %ld\n", */ + /* alloc->obj_size, */ + /* alloc->call_line, */ + /* alloc->alloc_line, */ + /* alloc->thread_id, */ + /* alloc->freed */ + /* ); */ + /* printf("calling constructor %s %s %s\n", */ + /* alloc->alloc_class.c_str(), */ + /* alloc->alloc_method.c_str(), */ + /* alloc->alloc_file.c_str() */ + /* ); */ + /* printf("calling constructor %s %s %s\n", */ + /* alloc->call_class.c_str(), */ + /* alloc->call_method.c_str(), */ + /* alloc->call_file.c_str() */ + /* ); */ + /* printf("class %ld\n", */ + /* (long*)(static_cast(*agent_allocation_class))); */ + + local_ref rec = + local_ref(env, + env->NewObject(*agent_allocation_class, + agent_allocation_ctor, + (jstring)class_jstr, + alloc->obj_size, + (jstring)call_class_jstr, + (jstring)call_method_jstr, + (jstring)call_file_jstr, + alloc->call_line, + (jstring)alloc_class_jstr, + (jstring)alloc_method_jstr, + (jstring)alloc_file_jstr, + alloc->alloc_line, + alloc->thread_id, + alloc->freed)); + + /* printf("calling data 1\n"); */ + env->CallStaticVoidMethod(*agent_class, + agent_data1_method, + (jobject&)rec); + } + + untag_objects(jvmti); + allocs.clear(); +} + +static char* terminate_string(char* class_name) { + class_name[strlen(class_name) - 1] = 0; + return class_name + 1; +} +static std::string allocation_sampler_name("Lcriterium/agent/core/AllocationSampler"); + + +void watch_field(jvmtiEnv* jvmti, + JNIEnv* env, + jthread thread, + jclass klass, + const char* field_name, + const char* field_sig=NULL) { + jfieldID field_id = (env)->GetFieldID(klass, field_name, field_sig); + if (field_id != NULL) { + // printf("found field\n"); + } else { + printf("field not found\n"); + } + + auto err = (jvmti)->SetFieldModificationWatch(klass, field_id); +} + + +void enable_allocation_tracing(JNIEnv* env) { + // printf("Enable allocation tracing\n"); + set_state(env, allocation_tracing_starting); + + { + auto monitor = std::make_unique(sample_lock); + allocs.clear(); + allocs_by_tag.clear(); + } + + jvmti->SetHeapSamplingInterval(0); + + auto err = jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + NULL); + if (err != JVMTI_ERROR_NONE) { + printf("Failed to enable allocation sampling %d\n", err); + } + err = jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_OBJECT_FREE, + NULL); + if (err != JVMTI_ERROR_NONE) { + printf("Failed to enable objrct free notifivations %d\n", err); + } + + // printf("Enabled\n"); +} + +void disable_allocation_tracing(JNIEnv* env) { + set_state(env, allocation_tracing_stopping); +} + +void JNICALL FieldModification(jvmtiEnv* jvmti, + JNIEnv* env, + jthread thread, + jmethodID method, + jlocation location, + jclass field_klass, + jobject object, + jfieldID field, + char signature_type, + jvalue new_value) { + // printf("\nField modification\n"); + // bool value = new_value.z; + // // printf("got modified field value %d\n", value); + + // if (!!value) { + // enable_allocation_tracing(env, thread, object); + // } else { + // disable_allocation_tracing(jvmti, env, thread, object); + // } +} + +void JNICALL ClassLoad(jvmtiEnv* jvmti, + JNIEnv* env, + jthread thread, + jclass klass) { + char *className; + int err = (jvmti)->GetClassSignature(klass, &className, NULL); + if (className != NULL) { + terminate_string(className); + if (allocation_sampler_name == className) { + // printf("\nFound allocation sampler\n"); + watch_field(jvmti, env, thread, klass, "enabled", "Z"); + } + } +} +enum Commands : jlong { + ping, + sync_state, + start_allocation_tracing = 10, + stop_allocation_tracing = 11, + report_allocation_tracing = 12 +}; + + +void JNICALL Agent_command(JNIEnv* env, jclass klass, jlong cmd) { + // Dispatch commands from the Agent class. + // printf("received command %ld in state %ld\n", cmd, agent_state); + + + if (ping == cmd) { + env->CallStaticVoidMethod(*agent_class, + agent_data1_method, + env->NewStringUTF("Alive")); + } else if (sync_state == cmd) { + set_state(env, agent_state); + } else if (start_allocation_tracing == cmd) { + enable_allocation_tracing(env); + } else if (stop_allocation_tracing == cmd) { + disable_allocation_tracing(env); + } else if (report_allocation_tracing == cmd) { + allocation_tracing_report(env); + } else { + printf("Received unknown command: %ld\n", cmd); + } +} + + +void JNICALL VMInit(jvmtiEnv* jvmti, JNIEnv* env, jthread thread) { + auto monitor = std::make_unique(sample_lock); + + jclass thread_klass = env->GetObjectClass(thread); + thread_getId_method = env->GetMethodID(thread_klass, "getId", "()J"); + + auto klass = mk_local_ref(env, env->FindClass("criterium/agent/Agent")); + if (klass == NULL) { + printf("Failed to find Agent class\n"); + return; + } + + auto allocation_start_marker_klass = + mk_local_ref(env, env->FindClass(allocation_start_marker)); + if (allocation_start_marker_klass == NULL) { + printf("Failed to find Agent$AllocationStartMarker class\n"); + return; + } + + auto allocation_finish_marker_klass = + mk_local_ref(env, env->FindClass(allocation_finish_marker)); + if (allocation_finish_marker_klass == NULL) { + printf("Failed to find Agent$AllocationFinishMarker class\n"); + return; + } + + auto allocation_klass = + mk_local_ref(env, env->FindClass(allocation_class_name)); + if (allocation_klass == NULL) { + printf("Failed to find Allocation class\n"); + return; + } + + static JNINativeMethod registry[1] = { + {const_cast("command"), + const_cast("(J)V"), + (void*)Agent_command} + }; + auto err = env->RegisterNatives(klass, registry, 1); + if (err != JVMTI_ERROR_NONE ) { + printf("Registration of native methods on Agent failed %d\n", err); + return; + } + + auto data1_method = + env->GetStaticMethodID(klass, "data1", "(Ljava/lang/Object;)V"); + if (data1_method == NULL ) { + printf("failed to find Agent.data1 method\n"); + return; + } + auto data8_method = + env->GetStaticMethodID(klass, "data8", "(Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;Ljava/lang/Object;)V"); + if (data8_method == NULL ) { + printf("failed to find Agent.data8 method\n"); + return; + } + + auto state_field = env->GetStaticFieldID(klass, "state", "J"); + if (state_field == NULL ) { + printf("failed to find Agent.state field\n"); + return; + } + + agent_class = std::make_unique>(env, klass); + agent_allocation_start_marker_class = + std::make_unique>(env, allocation_start_marker_klass); + agent_allocation_finish_marker_class = + std::make_unique>(env, allocation_finish_marker_klass); + agent_allocation_class = + std::make_unique>(env, allocation_klass); + + /* assert((jclass)allocation_klass == NULL); */ + /* assert(allocation_klass == NULL); */ + + agent_allocation_ctor = env->GetMethodID(*agent_allocation_class, + "", + agent_allocation_class_args); + if (agent_allocation_ctor == NULL) { + printf("Failed to get Allocation constructor\n"); + } + + agent_data1_method = data1_method; + agent_data8_method = data8_method; + agent_state_field = state_field; + + // initialise the state field + set_state(env, passive); +} + +void JNICALL VMDeath(jvmtiEnv* jvmti, JNIEnv* env) { + vm_dead = true; +} + +void parse_options(jvmtiEnv* jvmti, char* options) { + if (options != NULL && options[0] >= '0' && options[0] <= '9') { + jvmti->SetHeapSamplingInterval(std::atoi(options)); + } +} + +JNIEXPORT jint JNICALL +Agent_OnLoad(JavaVM* vm, char* options, void* reserved) { + printf("Loading criterium agent\n"); + + vm->GetEnv((void**) &jvmti, JVMTI_VERSION_1_0); + + jvmti->CreateRawMonitor("sample_lock", &sample_lock); + jvmti->CreateRawMonitor("tag_lock", &tag_lock); + + jvmtiCapabilities capabilities = {0}; + capabilities.can_generate_sampled_object_alloc_events = 1; + capabilities.can_generate_field_modification_events = 1; + capabilities.can_get_line_numbers = 1; + capabilities.can_get_source_file_name = 1; + capabilities.can_tag_objects = 1; + capabilities.can_generate_object_free_events = 1; + jvmti->AddCapabilities(&capabilities); + + jvmtiEventCallbacks callbacks = {0}; + callbacks.SampledObjectAlloc = SampledObjectAlloc; + // callbacks.ClassLoad = ClassLoad; + callbacks.ObjectFree = ObjectFree; + callbacks.FieldModification = FieldModification; + callbacks.VMInit = VMInit; + callbacks.VMDeath = VMDeath; + jvmti->SetEventCallbacks(&callbacks, sizeof(callbacks)); + // jvmti->SetEventNotificationMode(JVMTI_ENABLE, + // JVMTI_EVENT_SAMPLED_OBJECT_ALLOC, + // NULL); + // jvmti->SetEventNotificationMode(JVMTI_ENABLE, + // JVMTI_EVENT_CLASS_LOAD, + // NULL); + jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_FIELD_MODIFICATION, + NULL); + + jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_VM_INIT, + NULL); + jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_VM_DEATH, + NULL); + + jvmti->SetHeapSamplingInterval(0); + + parse_options(jvmti, options); + // printf("\nallocation sampler loaded\n"); + + return JNI_OK; +} + +JNIEXPORT jint JNICALL +Agent_OnAttach(JavaVM* vm, char* options, void* reserved) { + if (jvmti != NULL) { + return 0; + } + return Agent_OnLoad(vm, options, reserved); +} + +JNIEXPORT void JNICALL Java_criterium_agent_sayHello(JNIEnv *) { + printf("hello\n"); +} diff --git a/agent-cpp/compile_commands.json b/agent-cpp/compile_commands.json new file mode 100644 index 0000000..c979903 --- /dev/null +++ b/agent-cpp/compile_commands.json @@ -0,0 +1,16 @@ +[ + { + "arguments": [ + "/usr/bin/clang", + "-c", + "-std=c++17", + "-I.", + "-I/Library/Java/JavaVirtualMachines/jdk-11.0.12.jdk/Contents/Home/include", + "-I/Library/Java/JavaVirtualMachines/jdk-11.0.12.jdk/Contents/Home/include/darwin", + "-oliballocsampler.dylib", + "sampler.cpp" + ], + "directory": "/Users/duncan/projects/hugoduncan/criterium/allocation-sampler", + "file": "/Users/duncan/projects/hugoduncan/criterium/allocation-sampler/sampler.cpp" + } +] diff --git a/bases/agent/deps.edn b/bases/agent/deps.edn new file mode 100644 index 0000000..1a19877 --- /dev/null +++ b/bases/agent/deps.edn @@ -0,0 +1,23 @@ +{:paths ["src/clj" "target/classes" "resources"] + :java-paths ["src/java"] + :deps {} + :deps/prep-lib {:ensure "target/classes/criterium/agent" + :alias :build + :fn javac} + :aliases {:test + {:extra-paths ["test"] + :extra-deps + {org.clojure/clojure {:mvn/version "1.12.0"} + polylith-kaocha/kaocha-wrapper + {:git/url "https://github.com/imrekoszo/polylith-kaocha" + :git/tag "v0.8.4" + :git/sha "f096de8" + :deps/root "projects/kaocha-wrapper"}}} + :build {:deps + {io.github.hugoduncan/makejack + #_ {:local/root "../makejack/projects/makejack-jar"} + {:git/sha "6966696979c9b9c0c97e54f82a5d4096e4105240" + :deps/root "projects/makejack-jar"}} + :ns-default makejack.tasks + :exec-fn help + :jvm-opts ["-Dclojure.main.report=stderr"]}} } diff --git a/bases/agent/resources/agent/.keep b/bases/agent/resources/agent/.keep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bases/agent/resources/agent/.keep @@ -0,0 +1 @@ + diff --git a/bases/agent/src/clj/criterium/agent.clj b/bases/agent/src/clj/criterium/agent.clj new file mode 100644 index 0000000..901fa23 --- /dev/null +++ b/bases/agent/src/clj/criterium/agent.clj @@ -0,0 +1,93 @@ +(ns criterium.agent + "Interface to the Criterium native agent for allocation tracking and profiling. + + This namespace provides functions for tracking JVM heap allocations and garbage + collection during benchmark execution. It uses a native agent to capture detailed + allocation information with minimal overhead. + + The key concepts are: + - Native Agent: A JVM agent that hooks into allocation events + - Allocation Records: Detailed data about each object allocation + - Thread Filtering: Ability to focus on allocations from specific threads + + Example usage: + + ```clojure + (let [[allocations result] (with-allocation-tracing + (your-code-here))] + ;; Filter to current thread + (let [thread-allocs (filter (allocation-on-thread?) allocations)] + (allocations-summary thread-allocs))) + ``` + + Note: The agent must be loaded via JVM args for allocation tracking to work." + (:require + [criterium.agent.core :as core] + [criterium.jvm :as jvm])) + +(defn attached? + "Predicate for whether the criterium native agent is properly attached. + + Returns true if the agent was successfully loaded and initialized by the JVM, + false otherwise. The agent must be attached for allocation tracking to work." + [] + (core/attached?)) + +(defmacro with-allocation-tracing + "Creates a scope in which all JVM heap allocations and releases are tracked. + + Returns a vector of [allocation-records result] where: + - allocation-records: A sequence of maps containing detailed allocation data: + {:object-type - Class of allocated object + :object_size - Size in bytes + :call-class - Class that triggered allocation + :call-method - Method that triggered allocation + :call-file - Source file of allocation + :call-line - Line number of allocation + :alloc-class - Class doing allocation + :alloc-method - Method doing allocation + :alloc-file - Source file of allocator + :alloc-line - Line number of allocator + :thread - Thread ID of allocation + :freed - Whether object was freed} + - result: The value returned by the body forms + + Note that the allocations tracked are not limited to the current + thread. Filter the returned records with `thread-allocations` if that + is all you are concerned with." + [& body] + (if (attached?) + (core/with-allocation-tracing-enabled body) + (core/with-allocation-tracing-disabled body))) + +(defn allocation-on-thread? + "Returns a predicate function for filtering allocation records by thread. + + The returned function takes an allocation record and returns true if the + allocation occurred on the specified thread. When called with no arguments, + uses the current thread's ID. + + Useful for composing with filter/remove to analyze allocations by thread" + ([] (core/allocation-on-thread? (jvm/current-thread-id))) + ([thread-id] (core/allocation-on-thread? thread-id))) + +(defn allocation-freed? + "Predicate that returns true if the allocation record indicates the object was freed. + + An object is considered freed when it has been garbage collected during the + allocation tracking session. This helps identify temporary allocations vs + retained objects." + [record] (core/allocation-freed? record)) + +(defn allocations-summary + "Returns a summary of allocation statistics for the given records. + + Takes a sequence of allocation records and returns a map with: + {:num-allocated - Total number of objects allocated + :num-freed - Number of allocated objects that were freed + :allocated-bytes - Total bytes allocated + :freed-bytes - Total bytes from freed objects} + + Useful for getting high-level metrics from allocation tracking results." + [records] + (core/allocations-summary records)) diff --git a/bases/agent/src/clj/criterium/agent/core.clj b/bases/agent/src/clj/criterium/agent/core.clj new file mode 100644 index 0000000..ac41ff6 --- /dev/null +++ b/bases/agent/src/clj/criterium/agent/core.clj @@ -0,0 +1,411 @@ +(ns criterium.agent.core + "Low-level interface to the Criterium native agent for allocation tracking. + + This namespace provides the core implementation for interacting with the + agent that tracks JVM heap allocations. It manages agent state, handles + allocation records, and provides primitives for the high-level API. + + Key Components: + - Native Agent Commands: Protocol for controlling agent behavior + - State Management: Track and validate agent state transitions + - Allocation Recording: Capture and store allocation events + - Data Processing: Transform raw allocation data into usable records + + Implementation Notes: + - Uses JNI bindings to communicate with native agent + - Manages thread-local and global agent state + - Optimized for minimal allocation overhead during tracing + - Handles concurrent access to shared state + + Warning: This is an internal implementation namespace. Most users should + use criterium.agent instead." + (:import + [com.sun.tools.attach + VirtualMachine] + [criterium.agent + Agent] + [java.lang.management + ManagementFactory])) + + +;;; Native Agent + +(def ^:internal records + "Atom containing the current set of allocation records. + + Records are captured during allocation tracing and stored here until + retrieved. The atom is cleared at the start of each tracing session. + + Structure: + Vector of maps, each containing allocation details like: + - :object-type - Class of allocated object + - :object_size - Size in bytes + - :call-* - Allocation call site information + - :alloc-* - Allocator information + - :thread - Thread ID + - :freed - GC status" + (atom [])) + +(defn- data-fn + "Callback function invoked by the native agent for allocation events. + + Processes allocation events from the native agent and stores them in + the records atom. Handles both object and primitive allocation + records. + + Implementation Notes: + - Called from native code via JNI + - Must be thread-safe + - Minimizes allocations during processing + - Filters out internal marker allocations + + Called in two forms: + 1. Single object form for complex allocations + 2. Multi-argument form for primitive allocations" + ([object] + (cond + (instance? criterium.agent.Allocation object) + (let [a ^criterium.agent.Allocation object] + (when (and (not= (.call_method a) "allocation_start_marker") + (not= (.call_method a) "allocation_finish_marker")) + (swap! records conj + {:object-type (.object_type a) + :object_size (.object_size a) + :call-class (.call_class a) + :call-method (.call_method a) + :call-file (.call_file a) + :call-line (.call_line a) + :alloc-class (.alloc_class a) + :alloc-method (.alloc_method a) + :alloc-file (.alloc_file a) + :alloc-line (.alloc_line a) + :thread (.thread a) + :freed (.freed a)}))) + :else + (prn :received object (type object)))) + ([a b c d e f g h] + (when (and (not= c "allocation_start_marker") + (not= c "allocation_finish_marker")) + (swap! records conj + {:object-type a + :call-class b + :call-method c + :file d + :size (Long/parseLong e) + :thread (Long/parseLong f) + :line (Long/parseLong g) + :freed (Long/parseLong h)})))) + +(Agent/set_handler data-fn); + +(def ^:private commands + "Map of command keywords to their numeric protocol values. + + Commands control agent behavior: + - :ping - Check agent responsiveness + - :sync-state - Synchronize agent state + - :start-allocation-tracing - Begin allocation tracking + - :stop-allocation-tracing - End allocation tracking + - :report-allocation-tracing - Retrieve allocation data + + Values correspond to the native agent protocol constants." + {:ping 0 + :sync-state 1 + :start-allocation-tracing 10 + :stop-allocation-tracing 11 + :report-allocation-tracing 12}) + +(def ^:private states + "Map of numeric state codes to their keyword representations. + + Agent States and Transitions: + :not-attached (-1) - Agent not loaded or initialized + :passive (0) - Agent loaded but inactive + :allocation-tracing-starting (10) -> :allocation-tracing-active + :allocation-tracing-active (11) - Collecting allocation data + :allocation-tracing-stopping (15) -> :allocation-tracing-flushing + :allocation-tracing-flushing (16) -> :allocation-tracing-flushed + :allocation-tracing-flushed (17) - Data ready for collection + + State transitions are managed by agent commands." + {-1 :not-attached + 0 :passive + 10 :allocation-tracing-starting + 11 :allocation-tracing-active + 15 :allocation-tracing-stopping + 16 :allocation-tracing-flushing + 17 :allocation-tracing-flushed}) + +(defn ^:internal agent-command + "Send a command to the native agent. + + Commands are sent via JNI and may block until the agent responds. + See commands map for valid command values. + + Implementation Notes: + - Thread-safe but may synchronize on agent state + - May trigger state transitions + - Command acknowledgement is synchronous" + [cmd] + (Agent/command (commands cmd))) + +;; (let [klass Agent +;; handle (.findStatic +;; (java.lang.invoke.MethodHandles/publicLookup) +;; Agent +;; "getState" +;; ;; Long/TYPE +;; (java.lang.invoke.MethodType/fromMethodDescriptorString +;; "()J" +;; (.getContextClassLoader (Thread/currentThread))) +;; ) +;; args (make-array Object 0)] + +;; (def h handle) + +;; (let [v ^long (.invoke ^java.lang.invoke.DirectMethodHandle$StaticAccessor +;; h nil)] +;; v) + +;; (defn ^:internal agent-state [] +;; (prn :val (.invoke handle args)) +;; (states +;; (.invoke handle args) +;; ;;(. state klass) +;; ))) + +;; (let [agent Agent] +;; (defn ^:internal agent-state [] +;; (states (. agent getState) +;; ;;(. state klass) +;; ))) + +;; Direct linking is used here to avoid var lookups, which can cause garbage +;; which we want to avoid in the sample collection path. +(binding [*compiler-options* (assoc *compiler-options* :direct-linking true)] + ;; Direct-linked implementation to minimize allocation overhead + + (defn ^:internal agent-state** + "Get raw numeric state from native agent instance. + + Performance critical path - uses type hints and direct linking. + Returns the raw state value for translation by agent-state. + + Implementation Notes: + - Type hinted for performance + - Direct linked to avoid var lookup + - Thread-safe but uncoordinated" + ^long [agent] + (. ^Agent agent getState) + ;;(. state klass) + ) + + (def agent-state* (partial agent-state** (new Agent))) + + (defn ^:internal agent-state [] + (get states (agent-state*))) + + (comment + (agent-command :ping)) + + (dotimes [_ 1000] (agent-state)) + + (defn allocation-start-marker + "Create a marker allocation to track start of allocation sequence. + + Used to synchronize the start of allocation tracking by creating a + recognizable allocation pattern. + + Implementation Notes: + - Creates a specific allocation pattern + - Filtered from final results + - Used for state transition timing" + [] + (Agent/allocation_start_marker)) + + (defn allocation-tracing-active? + "Test if allocation tracing is currently active. + + Returns true only when the agent is in the :allocation-tracing-active state + and fully initialized. + + Implementation Notes: + - Thread-safe state check + - Used to verify tracing preconditions + - Optimized for frequent checking" + [] + (= (agent-state) :allocation-tracing-active)) + + (defn attached? + "Test if the native agent is properly attached to the JVM. + + Returns true if the agent is loaded and initialized, false otherwise. + This is a prerequisite for any allocation tracking operations. + + Implementation Notes: + - Thread-safe state check + - Does not modify agent state + - Used to guard tracing operations" + [] + (not= (agent-state) :not-attached)) + + (defn ^:internal allocation-tracing-start! + "Initialize and start allocation tracing. + + Sequence: + 1. Send start command to agent + 2. Wait for agent state transition + 3. Force GC to clear existing allocations + 4. Create start marker and verify state + + Implementation Notes: + - Blocks until tracing is active + - Creates synchronization allocations + - May timeout if agent doesn't respond + - Thread-safe but should not be called concurrently" + [] + (agent-command :start-allocation-tracing) + (assert (= (agent-state) :allocation-tracing-starting)) + ;; (make-array Object (* 512 1024)) ; flush this + (System/gc) + (System/gc) + (System/gc) + (allocation-start-marker) + (loop [i 1000000] + (allocation-start-marker) + (when (and (pos? i) (not (allocation-tracing-active?))) + (recur (unchecked-dec i)))) + (when (not= (agent-state) :allocation-tracing-active) + (println "Warning allocation tracing failed to start promptly"))) + + (defn ^:internal allocation-tracing-stop! + "Stop allocation tracing and collect final results. + + Sequence: + 1. Send stop command to agent + 2. Create finish marker allocation + 3. Force GC to flush remaining allocations + 4. Wait for agent to finish processing + + Implementation Notes: + - Blocks until processing complete + - Creates marker allocations + - Thread-safe but should not be called concurrently + - May timeout if agent doesn't respond" + [] + (agent-command :stop-allocation-tracing) + (Agent/allocation_finish_marker) + ;; (Agent/allocation_marker) + (System/gc) + (loop [i 1000000] + (when (and (pos? i) + (do + (agent-command :sync-state) + (not= (agent-state) :allocation-tracing-flushed))) + ;; (when (zero? ^long (mod i 100)) + ;; (System/gc)) + (Agent/allocation_finish_marker) + (System/gc) + (recur (unchecked-dec i)))) + (when (not= (agent-state) :allocation-tracing-flushed) + (println "Warning allocation tracing failed to stop promptly")))) + +(defn with-allocation-tracing-enabled [body] + `(let [res# (try + (allocation-tracing-start!) + ~@body + (finally + (allocation-tracing-stop!)))] + (reset! records []) + (agent-command :report-allocation-tracing) + [@records res#])) + +(defn with-allocation-tracing-disabled [body] + `[nil (do ~@body)]) + +;; (trace-allocation) + +(defn allocation-on-thread? + "Returns a predicate function for filtering allocation records by thread. + + The returned function takes an allocation record and returns true if the + allocation occurred on the specified thread. Useful for filtering allocation + records to analyze per-thread behavior. + + Parameters: + thread-id - Thread ID to match against allocation records + + Returns a function that takes an allocation record and returns true if + the record's thread matches the specified thread-id." + [thread-id] + (fn allocation-on-thread? + [record] + (= thread-id (:thread record)))) + +(defn allocation-freed? + "Predicate that returns true if the allocation record indicates the object was freed. + + An object is considered freed when it has been garbage collected during the + allocation tracking session. This helps identify temporary allocations vs + retained objects. + + Parameters: + record - The allocation record to check + + Returns true if the record indicates the object was freed during tracking." + [record] + (pos? (long (:freed record)))) + +(defn allocations-summary + "Returns a summary of allocation statistics for the given records. + + Takes a sequence of allocation records and returns a map with: + {:num-allocated - Total number of objects allocated + :num-freed - Number of allocated objects that were freed + :allocated-bytes - Total bytes allocated + :freed-bytes - Total bytes from freed objects} + + Parameters: + records - Sequence of allocation records to summarize + + Returns a map containing allocation statistics." + [records] + (let [freed (filterv allocation-freed? records)] + {:num-allocated (count records) + :num-freed (count freed) + :allocated-bytes (reduce + (map :object_size records)) + :freed-bytes (reduce + (map :object_size freed))})) + +;;; Object Size Agent + +(comment + ;; TODO re-enable these + (defn pid + "Return the PID of the current JVM. + This may not work on all JVM/OS instances." + [] + (re-find #"\d+" (.getName (ManagementFactory/getRuntimeMXBean)))) + + (defn- load-agent* + "Attach the javaagent from a jar file to the current JVM." + [^String jar-path] + (.loadAgent (VirtualMachine/attach (pid)) jar-path)) + + (defn- find-jar + "Find the agent jar" + ;; TODO use tools.deps.alpha + [] + "criterium-agent.jar") + + #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} + (defn load-agent + "Load the agent" + [] + (load-agent* (find-jar))) + + #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} + (defn object-size + "Return the approximate size of an object in bytes." + ^long [_x] + #_(agent/object-size x) + 0)) diff --git a/bases/agent/src/clj/criterium/agent/instrumentation.clj b/bases/agent/src/clj/criterium/agent/instrumentation.clj new file mode 100644 index 0000000..e5c2dc4 --- /dev/null +++ b/bases/agent/src/clj/criterium/agent/instrumentation.clj @@ -0,0 +1,27 @@ +(ns criterium.agent.instrumentation + "A non-native agent to access the Instrumentation interface." + (:import + [java.lang.instrument + Instrumentation]) + (:gen-class + :methods + [^:static [agentmain [String java.lang.instrument.Instrumentation] void]])) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn -premain + "Invoke when attached at startup" + [args _inst] + (println "Loading criterium.agent" args)) + +(defonce ^:private instrument (atom nil)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn -agentmain + "Invoke when attached into running jvm" + [^String _args ^Instrumentation instrumentation] + (swap! instrument (constantly instrumentation))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn object-size + ^long [x] + (.getObjectSize ^Instrumentation @instrument x)) diff --git a/bases/agent/src/java/criterium/agent/Agent.java b/bases/agent/src/java/criterium/agent/Agent.java new file mode 100644 index 0000000..7705f83 --- /dev/null +++ b/bases/agent/src/java/criterium/agent/Agent.java @@ -0,0 +1,56 @@ +package criterium.agent; + +import clojure.lang.IFn; + +// Low level interface to native agent. + +// This is in jaa so that the class is available on VM_START, +// and the agent can start with minimal capabilities and overhead. + +public class Agent { + public static long state = -1; + public static IFn handler; + + // clojure has trouble getting the static field without garbage + public long getState() { + return state; + } + + public static class AllocationStartMarker {}; + public static class AllocationFinishMarker {}; + + public static AllocationStartMarker allocation_start_marker() { + return new AllocationStartMarker(); + } + + public static AllocationFinishMarker allocation_finish_marker() { + return new AllocationFinishMarker(); + } + + public static void set_handler(clojure.lang.IFn handler_fn) { + handler = handler_fn; + } + + public static void here() { + System.out.println("here"); + } + + // commands sent to the agent + public static native void command(long cmd); + + // data sent from the agent + public static void data1(Object object) { + if (handler != null) { + handler.invoke(object); + } else { + System.out.print(object); + } + } + public static void + data8(Object a, Object b, Object c, Object d, Object e, Object f, Object g, + Object h) { + if (handler != null) { + handler.invoke(a, b, c, d, e, f, g, h); + } + } +} diff --git a/bases/agent/src/java/criterium/agent/Allocation.java b/bases/agent/src/java/criterium/agent/Allocation.java new file mode 100644 index 0000000..4dee1a2 --- /dev/null +++ b/bases/agent/src/java/criterium/agent/Allocation.java @@ -0,0 +1,45 @@ +package criterium.agent; + +public class Allocation { + public final String object_type; + public final long object_size; + + public final String call_class; + public final String call_method; + public final String call_file; + public final long call_line; + + public final String alloc_class; + public final String alloc_method; + public final String alloc_file; + public final long alloc_line; + + public final long thread; + public final long freed; + + public Allocation(String object_type, + long object_size, + String call_class, + String call_method, + String call_file, + long call_line, + String alloc_class, + String alloc_method, + String alloc_file, + long alloc_line, + long thread, + long freed) { + this.object_type = object_type; + this.object_size = object_size; + this.call_class = call_class; + this.call_method = call_method; + this.call_file = call_file; + this.call_line = call_line; + this.alloc_class = alloc_class; + this.alloc_method = alloc_method; + this.alloc_file = alloc_file; + this.alloc_line = alloc_line; + this.thread = thread; + this.freed = freed; + } +} diff --git a/bases/agent/test/criterium/agent/core_test.clj b/bases/agent/test/criterium/agent/core_test.clj new file mode 100644 index 0000000..b45b08a --- /dev/null +++ b/bases/agent/test/criterium/agent/core_test.clj @@ -0,0 +1,146 @@ +(ns criterium.agent.core-test + "Tests for the criterium.agent.core namespace. + + Tests cover internal implementation details including: + - State management and transitions + - Command protocol implementation + - Allocation tracking and filtering + - GC interaction and marker handling + + Test organization: + - Unit tests for internal functions + - State transition tests + - Error condition tests + - Thread safety tests + - Performance tests" + (:require + [clojure.test :refer [deftest is testing use-fixtures]] + [criterium.agent.core :as agent-core] + [criterium.jvm :as jvm])) + +;; Test Fixtures + +(defn cleanup-fixture + "Cleanup fixture to ensure consistent test environment." + [f] + (try + (f) + (finally + (System/gc) + (Thread/sleep 100)))) + +(use-fixtures :each cleanup-fixture) + +;; Helper Functions + +(defn with-timeout + "Run body with timeout protection." + [timeout-ms f] + (let [future (future (f))] + (try + (deref future timeout-ms :timeout) + (finally + (future-cancel future))))) + +;; JIT warm up for performance sensitive tests +(dotimes [_ 10000] (agent-core/agent-state)) + +;; Unit Tests + +(deftest agent-state-test + (testing "Basic state access" + (is (keyword? (agent-core/agent-state)) + "Agent state should be a keyword") + (is (contains? #{:not-attached :passive + :allocation-tracing-starting :allocation-tracing-active + :allocation-tracing-stopping :allocation-tracing-flushing + :allocation-tracing-flushed} + (agent-core/agent-state)) + "Agent state should be a valid state keyword")) + + (testing "State caching and updates" + (let [initial-state (agent-core/agent-state) + second-state (agent-core/agent-state)] + (is (= initial-state second-state) + "State should be stable between reads")))) + +(deftest agent-command-test + (testing "Basic command sending" + (is (number? (agent-core/agent-command :ping)) + "Ping command should return a numeric response") + + (is (number? (agent-core/agent-command :sync-state)) + "Sync command should return a numeric response")) + (testing "Invalid commands" + (is (thrown? IllegalArgumentException + (agent-core/agent-command :invalid-command)) + "Invalid commands should throw exceptions"))) + +(deftest allocation-tracking-test + (testing "Start/stop cycle" + (when (agent-core/attached?) + (agent-core/allocation-tracing-start!) + (is (agent-core/allocation-tracing-active?) + "Tracing should be active after start") + + (agent-core/allocation-tracing-stop!) + (is (not (agent-core/allocation-tracing-active?)) + "Tracing should be inactive after stop"))) + + (testing "Marker allocations" + (when (agent-core/attached?) + (agent-core/allocation-tracing-start!) + (agent-core/allocation-start-marker) + (let [records @agent-core/records] + (is (not-any? #(= (:call-method %) "allocation_start_marker") records) + "Marker allocations should be filtered")) + (agent-core/allocation-tracing-stop!)))) + +(deftest allocation-filtering-test + (testing "Thread filtering" + (let [current-thread (jvm/current-thread-id) + other-thread (inc current-thread) + pred (agent-core/allocation-on-thread? current-thread)] + (is (pred {:thread current-thread}) + "Should match current thread") + (is (not (pred {:thread other-thread})) + "Should not match other thread"))) + + (testing "Freed allocation detection" + (is (agent-core/allocation-freed? {:freed 1}) + "Should detect freed allocation") + (is (not (agent-core/allocation-freed? {:freed 0})) + "Should detect non-freed allocation"))) + +;; Integration Tests + +(deftest full-allocation-cycle-test + (testing "Complete allocation tracking cycle" + (when (agent-core/attached?) + (let [result (with-timeout 5000 + #(do + (agent-core/allocation-tracing-start!) + (let [_ (Object.)] + (System/gc) + (Thread/sleep 100) + (agent-core/allocation-tracing-stop!) + @agent-core/records)))] + (is (not= :timeout result) + "Allocation cycle should complete within timeout") + (when-not (= :timeout result) + (is (seq result) + "Should capture allocations") + (is (some #(= (:object-type %) "java.lang.Object") result) + "Should capture Object allocation")))))) + +;; Performance Tests + +(deftest ^:performance state-access-performance + (testing "State access overhead" + ;; Should complete quickly with no allocation + (let [start-time (System/nanoTime) + _results (dotimes [_ 1000000] + (agent-core/agent-state)) + elapsed (/ (- (System/nanoTime) start-time) 1e6)] + (is (< elapsed 1000) + "1M state reads should complete in under 1 second")))) diff --git a/bases/agent/test/criterium/agent_test.clj b/bases/agent/test/criterium/agent_test.clj new file mode 100644 index 0000000..94cbe24 --- /dev/null +++ b/bases/agent/test/criterium/agent_test.clj @@ -0,0 +1,154 @@ +(ns criterium.agent-test + "Tests for the criterium.agent namespace. + + Tests cover four main areas: + 1. Agent Attachment - Verifying agent initialization and status + 2. Allocation Tracking - Testing allocation capture functionality + 3. Thread Filtering - Testing thread-specific allocation filtering + 4. Results Analysis - Testing allocation summary and statistics + + Test organization: + - Unit tests verify individual function behavior + - Integration tests verify interaction between components + - Zero-garbage tests verify allocation behavior + + Note: Some tests require the native agent to be properly attached." + (:require + [clojure.test :refer [deftest is testing use-fixtures]] + [criterium.agent :as agent] + [criterium.jvm :as jvm])) + +;; Test Fixtures + +(defn cleanup-fixture + "Cleanup fixture to ensure consistent test environment." + [f] + (try + (f) + (finally + ;; Force GC to clean up any test allocations + (jvm/run-finalization-and-force-gc!)))) + +(use-fixtures :each cleanup-fixture) + +;; Helper Functions + +(defn make-test-allocation + "Creates a test allocation by constructing a string." + [] + (str "test-allocation-" (rand-int 1000))) + +;; Unit Tests + +(deftest attached?-test + (testing "Agent attachment status" + (is (boolean? (agent/attached?)) + "Should return a boolean indicating attachment status"))) + +(deftest with-allocation-tracing-test + (testing "Basic tracing functionality" + (let [[allocs rv] (agent/with-allocation-tracing 1)] + (is (= 1 rv) + "Should return the result value unchanged") + (when (agent/attached?) + (is (vector? allocs) + "Should return allocations as a vector")))) + + (testing "Tracing with allocations" + (when (agent/attached?) + (let [[allocs rv] (agent/with-allocation-tracing + (make-test-allocation))] + (is (string? rv) + "Should return the created string") + (is (seq allocs) + "Should capture string allocation") + (is (some #(= (:object-type %) "java.lang.String") + allocs) + "Should include string allocation record")))) + + (testing "Nested tracing calls" + (let [[outer-allocs outer-rv] + (agent/with-allocation-tracing + (let [[inner-allocs inner-rv] + (agent/with-allocation-tracing 1)] + (is (= 1 inner-rv)) + (when (agent/attached?) + (is (vector? inner-allocs))) + 2))] + (is (= 2 outer-rv)) + (when (agent/attached?) + (is (vector? outer-allocs))))) + + (testing "Exception handling" + (is (thrown? Exception + (agent/with-allocation-tracing + (throw (Exception. "test exception"))))))) + +(deftest allocation-on-thread?-test + (testing "Thread filtering predicate" + (let [current-thread (jvm/current-thread-id) + pred (agent/allocation-on-thread?)] + (is (fn? pred) + "Should return a predicate function") + (is (pred {:thread current-thread}) + "Should match current thread") + (is (not (pred {:thread (inc current-thread)})) + "Should not match other threads"))) + + (testing "Explicit thread ID" + (let [test-thread 12345 + pred (agent/allocation-on-thread? test-thread)] + (is (pred {:thread test-thread}) + "Should match specified thread") + (is (not (pred {:thread 0})) + "Should not match other threads")))) + +(deftest allocation-freed?-test + (testing "Freed allocation detection" + (is (agent/allocation-freed? {:freed 1}) + "Should identify freed allocation") + (is (not (agent/allocation-freed? {:freed 0})) + "Should identify non-freed allocation"))) + +(deftest allocations-summary-test + (testing "Empty allocation summary" + (let [summary (agent/allocations-summary [])] + (is (= {:num-allocated 0 + :num-freed 0 + :allocated-bytes 0 + :freed-bytes 0} + summary) + "Should return zero summary for empty input"))) + + (testing "Allocation summary with records" + (let [records [{:object_size 100 :freed 1} + {:object_size 200 :freed 0} + {:object_size 300 :freed 1}] + summary (agent/allocations-summary records)] + (is (= {:num-allocated 3 + :num-freed 2 + :allocated-bytes 600 + :freed-bytes 400} + summary) + "Should correctly summarize allocation records")))) + +;; Integration Tests + +(deftest allocation-tracking-integration-test + (testing "End-to-end allocation tracking" + (when (agent/attached?) + (let [[allocs result] (agent/with-allocation-tracing + (make-test-allocation)) + current-thread (jvm/current-thread-id) + thread-allocs (filter (agent/allocation-on-thread?) allocs) + summary (agent/allocations-summary thread-allocs)] + (is (string? result) + "Should complete allocation operation") + (is (pos? (:num-allocated summary)) + "Should capture allocations") + (is (every? #(= current-thread (:thread %)) thread-allocs) + "Should correctly filter thread allocations"))))) + +;; Warmup for allocation tests +(dotimes [_ 100] + (agent/with-allocation-tracing 1)) diff --git a/bases/criterium/.clj-kondo/config.edn b/bases/criterium/.clj-kondo/config.edn new file mode 100644 index 0000000..e01b2bf --- /dev/null +++ b/bases/criterium/.clj-kondo/config.edn @@ -0,0 +1 @@ +{:config-paths ["../../../.clj-kondo"]} diff --git a/bases/criterium/deps.edn b/bases/criterium/deps.edn new file mode 100644 index 0000000..e94fbff --- /dev/null +++ b/bases/criterium/deps.edn @@ -0,0 +1,25 @@ +{:paths ["src"] + :deps {org.openjdk.jmh/jmh-core {:mvn/version "1.37"}} + :aliases {:test + {:extra-paths + ["test"] + :extra-deps + {local/agent {:local/root "../agent"} + org.clojure/clojure {:mvn/version "1.11.2"} + expound/expound {:mvn/version "0.9.0"} + org.clojure/test.check {:mvn/version "1.1.1"} + polylith-kaocha/kaocha-wrapper + {:git/url "https://github.com/imrekoszo/polylith-kaocha" + :git/tag "v0.8.4" + :git/sha "f096de8" + :deps/root "projects/kaocha-wrapper"}}} + :kaocha {:extra-deps + {lambdaisland/kaocha {:mvn/version "1.91.1392"} + lambdaisland/kaocha-cloverage {:mvn/version "1.1.89"} + local/agent {:local/root "../agent"}} + :exec-fn kaocha.runner/exec-fn + :exec-args {} + :jvm-opts ["-XX:-OmitStackTraceInFastThrow" + "-Dclojure.main.report=stderr" + "-agentpath:../../agent-cpp/libcriterium.dylib"] + :main-opts ["-m" "kaocha.runner"]}}} diff --git a/bases/criterium/mj.edn b/bases/criterium/mj.edn new file mode 100644 index 0000000..f767ed5 --- /dev/null +++ b/bases/criterium/mj.edn @@ -0,0 +1,5 @@ +#mj {:project #include "project.edn" + :targets + #merge + [#default-targets [:pom :clean :jar :install :deploy] + #include "../mj-targets.edn"]} diff --git a/bases/criterium/src/criterium/analyse.clj b/bases/criterium/src/criterium/analyse.clj new file mode 100644 index 0000000..554ad2d --- /dev/null +++ b/bases/criterium/src/criterium/analyse.clj @@ -0,0 +1,450 @@ +(ns criterium.analyse + (:require + [criterium.analyse.digest-samples] + [criterium.analyse.methods :as methods] + [criterium.analyse.metrics-samples] + [criterium.collect-plan :as collect-plan] + [criterium.metric :as metric] + [criterium.types :as types] + [criterium.util.debug :as debug] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.util.sampled-stats :as sampled-stats] + [criterium.util.stats :as stats])) + +(defn exp [v] + (Math/exp v)) + +(defn log [v] + (Math/log v)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn transform-log + "Performs logarithmic transformation on time-based samples. + + Returns a function that takes a sampled data map and adds + log-transformed samples under a new key. Only transforms samples + with :time dimension from quantitative metrics. + + Parameters: + opts - Optional map with keys: + + :id - Key for transformed samples in result + (default: :log-samples) + :samples-id - Key for source samples in input (default: :samples) + :metric-ids - Set of metric ids to transform (default: all quantitative) + + The returned function: + - Takes a sampled data map containing samples and metric configs + - Returns the map with transformed samples added under :id key + - Preserves original samples and adds transform metadata + + Example: + (let [transform (transform-log {:id :my-logs}) + result (transform {:samples {...} :metrics-defs {...}})] + (:my-logs result)) ;; Contains log-transformed values" + ([] (transform-log {})) + ([{:keys [id samples-id metric-ids] :as options}] + (fn transform-log [data-map] + (let [samples-id (or samples-id :samples) + id (or id (keyword (str "log-" (name samples-id)))) + metrics-samples (have + types/generic-metrics-samples-map? + (data-map samples-id)) + metrics-defs (-> (:metrics-defs metrics-samples) + (metric/select-metrics metric-ids) + (metric/filter-metrics + (every-pred + (metric/type-pred :quantitative) + (metric/dimension-pred :time)))) + metric-configs (metric/all-metric-configs metrics-defs) + transformed (-> + (methods/transform + metrics-samples + metric-configs + log + exp + options) + (merge + {:source-id samples-id}))] + (assoc data-map id transformed))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn quantiles + "Calculates statistical quantiles for quantitative sample measurements. + + Returns a function that computes sample quantiles (including quartiles and + custom percentiles) for each quantitative metric in the input data. + + Parameters: + opts - Optional map with keys: + :id - Key for quantile results in output (default: :quantiles) + :samples-id - Key for source samples in input (default: :samples) + :metric-ids - Set of metric ids to analyze (default: all quantitative) + :quantiles - Vector of quantile values to calculate [0-1] + (default: [0.25 0.5 0.75]) + + The returned function: + - Takes a sampled data map containing samples and metric configs + - Returns the map with quantile analysis added under :id key + - Preserves source data transforms for correct value scaling + + Example: + (let [analyze (quantiles {:quantiles [0.05 0.95]}) + result (analyze {:samples {...} :metrics-defs {...}})] + (get-in result [:quantiles :elapsed-time])) + ;; Returns map of quantiles for elapsed time metric" + ([] (quantiles {})) + ([{:keys [id samples-id metric-ids] :as analysis}] + (fn quantiles [data-map] + {:pre [(have? types/result-map? data-map)]} + (let [samples-id (or samples-id :samples) + id (or id :quantiles) + metrics-samples (data-map samples-id) + metrics-defs (-> (:metrics-defs metrics-samples) + (metric/select-metrics metric-ids) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + quantiles (methods/quantiles + metrics-samples + metric-configs + analysis) + quantiles-map (have + types/quantiles-map? + (merge + {:type :criterium/quantiles + :source-id samples-id + :metrics-defs metrics-defs} + quantiles))] + (assoc data-map id quantiles-map))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn outliers + "Detects statistical outliers in sample measurements using boxplot criteria. + + Returns a function that identifies outliers based on the interquartile range + (IQR) method, classifying them as mild or severe deviations. + + Parameters: + opts - Optional map with keys: + :id - Key for outlier results (default: :outliers) + :samples-id - Key for source samples (default: :samples) + :quantiles-id - Key for required quantile analysis (default: :quantiles) + :metric-ids - Set of metric ids to analyze (default: all quantitative) + + The returned function: + - Takes a sampled data map containing samples, metrics config and quantiles + - Returns the map with outlier analysis added under :id key + - For each metric provides: + - outlier thresholds (IQR boundaries) + - identified outliers with indices + - counts by severity (low/high, mild/severe) + - Requires prior quantile analysis in input data + + Outlier Classification: + - Mild: between 1.5 and 3.0 IQR from quartiles + - Severe: beyond 3.0 IQR from quartiles + + Example: + (let [analyze (outliers) + result (analyze {:samples {...} + :metrics-defs {...} + :quantiles {...}})] + (get-in result [:outliers :elapsed-time])) + ;; Returns {:thresholds [...] :outliers {...} :outlier-counts {...}}" + ([] (outliers {})) + ([{:keys [id samples-id quantiles-id metric-ids]}] + (fn [data-map] + (let [id (or id :outliers) + quantiles-id (or quantiles-id :quantiles) + samples-id (or samples-id :samples) + all-quantiles (have (data-map quantiles-id)) + metrics-samples (have (data-map samples-id)) + metrics-defs (-> (:metrics-defs all-quantiles) + (metric/select-metrics metric-ids)) + metric-configs (metric/all-metric-configs metrics-defs)] + (when-not all-quantiles + (throw (ex-info + "outlier analysis requires quantiles analysis" + {:quantiles-id quantiles-id + :available-ids (keys data-map)}))) + (let [outliers (methods/outliers + metrics-samples + all-quantiles + metric-configs + {}) + outliers-map (have + types/outliers-map? + (merge + {:type :criterium/outliers + :source-id samples-id + :quantiles-id quantiles-id + :metrics-defs metrics-defs} + outliers))] + (assoc data-map id outliers-map)))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn stats + "Calculates comprehensive descriptive statistics for sample measurements. + + Returns a function that computes key statistics including mean, variance, + standard deviation bounds (±3σ), and min/max values for quantitative metrics. + + Parameters: + opts - Optional map with keys: + :id - Key for stats in output (default: :stats) + :samples-id - Key for source samples (default: :samples) + :outliers-id - Key for outlier analysis if available + :metric-ids - Set of metric ids to analyze (default: all quantitative) + + The returned function: + - Takes a sampled data map containing samples and metric configs + - Returns the map with statistics added under :id key + - For each metric, calculates: + - mean, variance + - mean ±3σ bounds + - min/max values + - Preserves data transforms for correct scaling + + Example: + (let [analyze (stats) + result (analyze {:samples {...} :metrics-defs {...}})] + (get-in result [:stats :elapsed-time])) + ;; Returns {:mean 100.0 :variance 16.0 ...}" + ([] (stats {})) + ([{:keys [id samples-id outliers-id metric-ids] + :as analysis}] + (let [samples-id (or samples-id :samples) + id (or id :stats) + outliers-id (or outliers-id :outliers)] + (fn [data-map] + (debug/dtap> {:stats id}) + (let [outliers (when outliers-id + (data-map outliers-id)) + metrics-samples (have (data-map samples-id)) + metrics-defs (-> (have (:metrics-defs metrics-samples)) + (metric/select-metrics metric-ids) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + stats (methods/stats + metrics-samples + outliers + metric-configs + analysis) + stats-map (have + types/stats-map? + (merge + {:type :criterium/stats + :metrics-defs metrics-defs + :source-id samples-id + :outliers-id outliers-id + :batch-size (:batch-size metrics-samples)} + stats))] + (assoc data-map id stats-map)))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn event-stats + "Calculates statistics for discrete events captured during sampling. + + Returns a function that aggregates event metrics like JIT compilation, + garbage collection, and class loading events that occur during benchmarking. + + Parameters: + opts - Optional map with keys: + :id - Key for event stats in output (default: :event-stats) + :samples-id - Key for source samples (default: :samples) + :metric-ids - Set of event metric ids to analyze (default: all events) + + The returned function: + - Takes a sampled data map containing samples and metrics configs + - Returns the map with event statistics added under :id key + - For each event metric collects: + - Total counts/durations + - Number of samples containing events + - Metric-specific aggregations (e.g., loaded/unloaded classes) + - Only processes metrics of type :event + + Example: + (let [analyze (event-stats) + result (analyze {:samples {...} :metrics-defs {...}})] + (:event-stats result)) + ;; Returns {:compilation {:time-ms 8 :sample-count 2} ...}" + ([] (event-stats {})) + ([{:keys [id samples-id metric-ids] :as analysis}] + (let [id (or id :event-stats) + samples-id (or samples-id :samples)] + (fn [data-map] + (debug/dtap> {:event-stats id}) + (let [metrics-samples (data-map samples-id) + metrics-defs (-> (:metrics-defs metrics-samples) + (metric/select-metrics metric-ids) + (metric/filter-metrics + (metric/type-pred :event))) + event-stats (methods/event-stats + metrics-samples + metrics-defs + analysis) + es-map (have + types/event-stats-map? + (merge + {:type :criterium/event-stats + :source-id samples-id + :metrics-defs metrics-defs} + event-stats))] + (assoc data-map id es-map)))))) + +(defn- min-f + ^double [f ^double q ^double r] + (min ^double (f q) ^double (f r))) + +(defn outlier-significance* + "Calculate statistical significance of outliers using gaussian fit analysis. + + Determines how well a gaussian distribution describes the sample statistics by + comparing the sample variance to the variance of a fitted gaussian model. A + high significance indicates the outliers substantially affect the + distribution. + + Based on the methodology described in: + http://www.ellipticgroup.com/misc/article_supplement.pdf, p17 + + Parameters: + mean - Sample mean (must be non-zero) + variance - Sample variance + batch-size - Number of measurements per sample (must be >= 16) + + Returns: + A value between 0 and 1 representing outlier significance: + - 0: No significant effect from outliers + - 1: Outliers heavily influence the distribution + + Throws: + AssertionError if preconditions on inputs are not met + + Example: + (outlier-significance* 100.0 16.0 67108864) ;; => 0.25" + [^double mean ^double variance ^long batch-size] + {:pre [(number? mean) (number? variance) (nat-int? batch-size)]} + (if (or (zero? variance) (< batch-size 16)) + 0 + (let [variance-block (* batch-size variance) + std-dev-block (Math/sqrt variance-block) + mean-g-min (/ mean 2) + sigma-g (min (/ mean-g-min 4) + (/ std-dev-block (Math/sqrt batch-size))) + variance-g (* sigma-g sigma-g) + batch-size-sqr (util/sqr batch-size) + c-max-f (fn ^long [^double t-min] ; Eq 38 + (let [j0-sqr (util/sqr (- mean t-min)) + k0 (- (* batch-size-sqr j0-sqr)) + k1 (+ variance-block + (- (* batch-size variance-g)) + (* batch-size j0-sqr)) + det (- (* k1 k1) + (* 4 variance-g k0))] + (long (Math/floor (/ (* -2 k0) + (+ k1 (Math/sqrt det))))))) + var-out (fn ^double [^long c] ; Eq 45 + (let [nmc (- batch-size c)] + (* (/ nmc (double batch-size)) + (- variance-block (* nmc variance-g))))) + c-max (min-f c-max-f 0.0 mean-g-min)] + (/ (min-f var-out 1.0 c-max) variance-block)))) + +(defn outlier-effect + "Return a keyword describing the effect of outliers on a point estimate." + [^double significance] + (cond + (< significance 0.01) :unaffected + (< significance 0.1) :slight + (< significance 0.5) :moderate + :else :severe)) + +(defn- samples-outlier-significance [batch-size outliers stats metric-configs] + (reduce + (fn sample-m [result metric] + (let [path (:path metric) + outlier-data (get-in outliers path) + stat (get-in stats path) + _ (assert (map? outlier-data) outlier-data) + outlier-counts (:outlier-counts outlier-data) + significance (when (some pos? (vals outlier-counts)) + (outlier-significance* + (:mean stat) + (:variance stat) + batch-size))] + (update-in result path + assoc + :significance significance + :effect (outlier-effect significance)))) + {} + metric-configs)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn outlier-significance + "Analyzes the statistical significance of detected outliers. + + Returns a function that calculates how much outliers affect the sample + distribution by comparing actual variance to an idealized gaussian model. + + Parameters: + opts - Optional map with keys: + :id - Key for significance results (default: :outlier-significance) + :outliers-id - Key for outlier analysis (default: :outliers) + :stats-id - Key for statistical analysis (default: :stats) + :metric-ids - Set of metric ids to analyze (default: all quantitative) + + The returned function: + - Takes a sampled data map containing outlier analysis and statistics + - Returns the map with significance analysis added under :id key + - For each metric provides: + - significance: A value between 0-1 indicating outlier impact + - effect: Keyword describing impact (:unaffected, :slight, :moderate, :severe) + - Requires prior outlier and statistical analysis in input data + + Effects are classified as: + - :unaffected - significance < 0.01 + - :slight - significance < 0.1 + - :moderate - significance < 0.5 + - :severe - significance >= 0.5 + + Example: + (let [analyze (outlier-significance) + result (analyze {:outliers {...} + :stats {...}})] + (get-in result [:outlier-significance :elapsed-time])) + ;; Returns {:significance 0.25 :effect :moderate}" + ([] (outlier-significance {})) + ([{:keys [id outliers-id stats-id metric-ids] :as _analysis}] + (fn [data-map] + (let [id (or id :outlier-significance) + outliers-id (or outliers-id :outliers) + stats-id (or stats-id :stats) + outliers (data-map outliers-id) + stats (data-map stats-id) + metrics-defs (-> (:metrics-defs stats) + (metric/select-metrics metric-ids) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs)] + (when-not outliers + (throw (ex-info + "outlier significance requires outlier analysis" + {:outliers-id outliers-id + :available-ids (keys data-map)}))) + (let [significance (samples-outlier-significance + (:batch-size stats) + (util/outliers outliers) + (util/stats stats) + metric-configs) + os-map (have + types/outlier-significance-map? + {:type :criterium/outlier-significance + :transform collect-plan/identity-transforms + :outlier-significance significance + :metrics-defs metrics-defs + :outliers-id outliers-id + :source-id stats-id})] + (assoc data-map id os-map)))))) diff --git a/bases/criterium/src/criterium/analyse/digest_samples.clj b/bases/criterium/src/criterium/analyse/digest_samples.clj new file mode 100644 index 0000000..3e6a29f --- /dev/null +++ b/bases/criterium/src/criterium/analyse/digest_samples.clj @@ -0,0 +1,140 @@ +(ns criterium.analyse.digest-samples + (:require + [criterium.analyse.methods :as methods] + [criterium.collect-plan :as collect-plan] + [criterium.types :as types] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.util.stats :as stats] + [criterium.util.t-digest :as t-digest])) + +(defmethod methods/transform :criterium/digest + [digest-samples metric-configs f inv-f options] + {:have [(have? types/digest-samples-map? digest-samples)]} + (let [metric->digest (util/metric->digest digest-samples) + metric->digest' (reduce + (fn x-path [result path] + (assoc + result + path + (t-digest/transform + (metric->digest path) + f))) + {} + (mapv :path metric-configs))] + (-> + (select-keys + digest-samples + types/digest-samples-keys) + (merge + {:metric->digest metric->digest' + :transform {:sample-> inv-f :->sample f}})))) + +(defmethod methods/quantiles :criterium/digest + [digest-samples metric-configs options] + {:have [(have? types/digest-samples-map? digest-samples)]} + (let [metric->digest (util/metric->digest digest-samples) + quantiles (into [0.25 0.5 0.75] (:quantiles options)) + quantiles (reduce + (fn qs [result path] + (let [digest (metric->digest path)] + (assoc-in + result path + (zipmap + quantiles + (mapv + (partial t-digest/quantile digest) + quantiles))))) + {} + (mapv :path metric-configs))] + {:type :criterium/quantiles + :quantiles quantiles + :transform collect-plan/identity-transforms})) + +(defn outlier-count + [low-severe low-mild high-mild high-severe] + {:low-severe low-severe + :low-mild low-mild + :high-mild high-mild + :high-severe high-severe}) + +(defn classifier + [[^double low-severe ^double low-mild ^double high-mild ^double high-severe]] + (fn [^double x i] + (when-not (<= low-mild x high-mild) + [i (cond + (<= x low-severe) :low-severe + (< low-severe x low-mild) :low-mild + (> high-severe x high-mild) :high-mild + (>= x high-severe) :high-severe)]))) + +(defn digest-outliers + [digest quantiles] + (let [thresholds (stats/boxplot-outlier-thresholds + (get quantiles 0.25) + (get quantiles 0.75)) + classifier (classifier thresholds) + outliers (when (apply not= thresholds) + (into {} + (mapv classifier + (t-digest/centroid-means digest) + (range)))) + outlier-counts (reduce-kv + (fn [counts _i v] + (update counts v inc)) + (outlier-count 0 0 0 0) + outliers)] + {:thresholds thresholds + :outliers outliers + :outlier-counts outlier-counts})) + +(defmethod methods/outliers :criterium/digest + [digest-samples all-quantiles metric-configs _options] + {:have [(have? types/digest-samples-map? digest-samples)]} + (let [metric->digest (util/metric->digest digest-samples) + quantiles (util/quantiles all-quantiles) + outliers (reduce + (fn qs [result path] + (let [digest (metric->digest path) + quantiles (get-in quantiles path)] + (assoc-in + result path + (digest-outliers digest quantiles) ))) + {} + (mapv :path metric-configs))] + {:type :criterium/outliers + :outliers outliers + :num-samples (t-digest/sample-count (first (vals metric->digest))) + :transform collect-plan/identity-transforms})) + +(defn- digest-sample-states + [digest outliers] + (let [mean (t-digest/mean digest) + variance (t-digest/variance digest mean) + sigma (Math/sqrt variance) + three-sigma (* 3.0 sigma)] + {:n (t-digest/sample-count digest) + :mean mean + :variance variance + :sigma sigma + :mean-plus-3sigma (+ mean three-sigma) + :mean-minus-3sigma (- mean three-sigma) + :min-val (t-digest/minimum digest) + :max-val (t-digest/maximum digest)})) + +(defmethod methods/stats :criterium/digest + [digest-samples outliers metric-configs options] + {:have [(have? types/digest-samples-map? digest-samples)]} + (let [metric->digest (util/metric->digest digest-samples) + outliers (when outliers (util/outliers outliers)) + stats (reduce + (fn qs [result path] + (let [digest (have (metric->digest path))] + (assoc-in + result path + (digest-sample-states digest outliers) ))) + {} + (mapv :path metric-configs))] + {:type :criterium/stats + :stats stats + :transform collect-plan/identity-transforms})) diff --git a/bases/criterium/src/criterium/analyse/methods.clj b/bases/criterium/src/criterium/analyse/methods.clj new file mode 100644 index 0000000..8634641 --- /dev/null +++ b/bases/criterium/src/criterium/analyse/methods.clj @@ -0,0 +1,26 @@ +(ns criterium.analyse.methods) + +(defmulti transform + "Transform sample values." + (fn [sample-map metric-configs f inv-f options] + (:type sample-map))) + +(defmulti quantiles + "Calculate quantiles." + (fn [sample-map metric-configs options] + (:type sample-map))) + +(defmulti outliers + "Calculate outliers." + (fn [sample-map quantiles metric-configs options] + (:type sample-map))) + +(defmulti stats + "Calculate sample statistics." + (fn [sample-map outliers metric-configs options] + (:type sample-map))) + +(defmulti event-stats + "Calculate sample statistics for events." + (fn [sample-map metric-configs options] + (:type sample-map))) diff --git a/bases/criterium/src/criterium/analyse/metrics_samples.clj b/bases/criterium/src/criterium/analyse/metrics_samples.clj new file mode 100644 index 0000000..7051d48 --- /dev/null +++ b/bases/criterium/src/criterium/analyse/metrics_samples.clj @@ -0,0 +1,120 @@ +(ns criterium.analyse.metrics-samples + (:require + [criterium.analyse.methods :as methods] + [criterium.collect-plan :as collect-plan] + [criterium.types :as types] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.util.sampled-stats :as sampled-stats] + [criterium.util.stats :as stats])) + +(defmethod methods/transform :criterium/metrics-samples + [metrics-samples metric-configs f inv-f options] + (let [metric->values (util/metric->values metrics-samples) + metric->values' (reduce + (fn x-path [result path] + (assoc + result + path + (mapv f (metric->values path)))) + {} + (mapv :path metric-configs))] + (-> + (select-keys + metrics-samples + types/metrics-samples-keys) + (merge + {:metric->values metric->values' + :transform {:sample-> inv-f :->sample f}})))) + +(defmethod methods/quantiles :criterium/metrics-samples + [metrics-samples metric-configs options] + (let [quantiles (sampled-stats/quantiles + (util/metric->values metrics-samples) + metric-configs + options)] + {:type :criterium/quantiles + :quantiles quantiles + :transform collect-plan/identity-transforms})) + +(defn outlier-count + [low-severe low-mild high-mild high-severe] + {:low-severe low-severe + :low-mild low-mild + :high-mild high-mild + :high-severe high-severe}) + +(defn classifier + [[^double low-severe ^double low-mild ^double high-mild ^double high-severe]] + (fn [^double x i] + (when-not (<= low-mild x high-mild) + [i (cond + (<= x low-severe) :low-severe + (< low-severe x low-mild) :low-mild + (> high-severe x high-mild) :high-mild + (>= x high-severe) :high-severe)]))) + +(defn samples-outliers [metric-configs all-quantiles samples] + (reduce + (fn sample-m [result metric-config] + (let [path (:path metric-config) + quantiles (have map? (get-in all-quantiles path) + {:all-quantiles all-quantiles}) + thresholds (stats/boxplot-outlier-thresholds + (get quantiles 0.25) + (get quantiles 0.75)) + classifier (classifier thresholds) + outliers (when (apply not= thresholds) + (into {} + (mapv classifier + (get samples path) + (range)))) + outlier-counts (reduce-kv + (fn [counts _i v] + (update counts v inc)) + (outlier-count 0 0 0 0) + outliers)] + (update-in result path + assoc + :thresholds thresholds + :outliers outliers + :outlier-counts outlier-counts))) + {} + metric-configs)) + +(defmethod methods/outliers :criterium/metrics-samples + [metrics-samples all-quantiles metric-configs _options] + {:have [(have? types/digest-samples-map? metrics-samples)]} + (let [outliers (samples-outliers + metric-configs + (util/quantiles all-quantiles) + (util/metric->values metrics-samples))] + {:type :criterium/outliers + :outliers outliers + :num-samples (:num-samples metrics-samples) + :transform collect-plan/identity-transforms})) + +(defmethod methods/stats :criterium/metrics-samples + [metrics-samples outliers metric-configs options] + {:have [(have? types/digest-samples-map? metrics-samples)]} + (let [metric->values (util/metric->values metrics-samples) + stats (sampled-stats/sample-stats + metric->values + (when outliers (util/outliers outliers)) + metric-configs + options)] + {:type :criterium/stats + :stats stats + :transform collect-plan/identity-transforms})) + +(defmethod methods/event-stats :criterium/metrics-samples + [metrics-samples metrics-defs _options] + {:have [(have? types/digest-samples-map? metrics-samples)]} + (let [metric->values (util/metric->values metrics-samples) + event-stats (sampled-stats/event-stats + metrics-defs + metric->values)] + {:type :criterium/event-stats + :event-stats event-stats + :batch-size (:batch-size metrics-samples) + :transform collect-plan/identity-transforms})) diff --git a/bases/criterium/src/criterium/bench.clj b/bases/criterium/src/criterium/bench.clj new file mode 100644 index 0000000..c42d5dc --- /dev/null +++ b/bases/criterium/src/criterium/bench.clj @@ -0,0 +1,210 @@ +(ns criterium.bench + "Perform sound benchmarking of Clojure code. + + Provides functions and macros for measuring code performance while + accounting for: + + - JVM warmup periods + - Garbage collection effects + - Statistical significance + + Primary API: + - bench - Macro for benchmarking expressions + - bench-measured - Function for benchmarking pre-wrapped measurements + - last-bench - Access results from most recent benchmark + + Example: + (bench (+ 1 1)) ; Basic usage + (bench (+ 1 1) :viewer :pprint) ; With pretty-printed output" + (:require + [criterium.bench.config :as bench-config] + [criterium.bench.impl :as impl] + [criterium.benchmark :as benchmark] + [criterium.collect-plan :as collect-plan] + [criterium.collector :as collector] + [criterium.measured :as measured] + [criterium.util.output :as output])) + +(defn last-bench + "Returns the complete measurement data from the most recent benchmark. + + The returned data structure contains all metrics, statistical analysis, and + raw measurements from the last call to bench or bench-measured. + + Returns nil if no benchmarks have been run in the current session. + + Example: + (bench (+ 1 1)) + (let [results (last-bench)] + ;; Access detailed metrics from results + )" + [] + (impl/last-bench)) + +(defn collect-data-map + "Collect metrics according to the sampling plan. + + Parameters: + collector-config - Configuration for the metric collector + collect-plan - Strategy for collecting samples + measured - The wrapped code/function to measure + + Returns collected measurements." + [collector-config collect-plan measured] + (let [collector (collector/collector collector-config)] + (collect-plan/collect collect-plan collector measured))) + +(defn analyze + "Apply statistical analysis to collected metrics. + + Parameters: + analyse-config - Vector of analysis steps to perform + metrics - Raw metrics from collect-metrics + + Returns analyzed metrics with statistical computations added." + [analyse-plan data-map] + (let [analyze (benchmark/->analyse analyse-plan)] + (analyze data-map))) + +(defn view + "Format and present analyzed metrics. + + Parameters: + view-config - Vector of view components to include + metrics - Analyzed metrics from analyze-metrics + options - Additional view options like :viewer + + Returns the viewed metrics data structure." + [view-plan viewer data-map] + (let [view (benchmark/->view view-plan)] + (view viewer data-map))) + +(defn- return-value + "Extract the returned value for the sampled." + [config bench-map] + (get-in bench-map (:return-value config))) + +(defn bench-measured + "Evaluate measured and output the benchmark time. + + By default, return the value of calling the measured's wrapped + function. + + The timing info is available as a data structure by calling last-time. + + Takes a bench-plan that fully specifies the benchmark behaviour." + [bench-plan measured] + (output/with-progress-reporting (:verbose bench-plan) + (let [data-map (->> (collect-data-map + (:collector-config bench-plan) + (:collect-plan bench-plan) measured) + (analyze (:analyse bench-plan)))] + (view (:view bench-plan) (:viewer bench-plan) data-map) + (impl/last-bench! {:bench-plan bench-plan :data data-map}) + (return-value bench-plan data-map)))) + +#_(defn bench-measured + "Evaluate and benchmark a pre-wrapped measurement. + + The metrics and output are controlled via parameters. + + Parameters: + measured - A wrapped function/expression prepared for measurement + options - Map of configuration options: + :viewer - Output format [:pprint, :portal, or nil(default)] + :analyse - Vector of analysis steps [[:outliers] [:stats]] + :view - Vector of view components [:stats] + :metric-ids - Vector of metrics to collect, from: + [:elapsed-time :garbage-collector :finalization + :memory :thread-allocation :compilation + :measured-args :class-loader] + :limit-time-s - Time limit in seconds (optional) + :collect-plan - Sampling strategy (optional) + + Return: + The value from evaluating the measured expression. + The complete benchmark data is available via (last-bench). + + Examples: + ;; Basic usage with a measured expression + (bench-measured my-measured {}) + + ;; With pretty-printed output and specific metrics + (bench-measured my-measured + {:viewer :pprint + :metric-ids [:elapsed-time :memory]}) + + Notes: + - Ensures statistical significance through multiple samples + - Accounts for JVM warmup + - Handles GC interference" + [measured options] + (bench-measured* (bench-config/config-map options) measured)) + +(defn options->bench-plan + "Explicit conversion of `bench` options into a bench-plan." + [& {:as options}] + (bench-config/config-map options)) + + +(defmacro bench + "Main macro for benchmarking Clojure expressions with statistical rigor. + + Intended for simplified use at the REPL. + + Takes an expression to benchmark, and optional configuration options. + + The expression must be free of local references. + + Parameters: + expr - Expression to benchmark + options - Keyword/value pairs for configuration: + :viewer - Output format [:pprint, :portal, or nil(default)] + :analyse - Vector of analysis steps [[:outliers] [:stats]] + :view - Vector of view components [:stats] + :metric-ids - Vector of metrics to collect, from: + [:elapsed-time :garbage-collector :finalization + :memory :thread-allocation :compilation + :measured-args :class-loader] + :limit-time-s - Time limit in seconds (optional) + :collect-plan - Sampling strategy (optional) + :time-fn - Custom timing function (optional) + + Returns: + The value from evaluating the expression. + Complete benchmark data available via (last-bench). + + Examples: + ;; Basic usage + (bench (+ 1 1)) + + ;; With pretty-printed output + (bench (+ 1 1) :viewer :pprint) + + ;; With specific metrics and time limit + (bench (my-function) + :metric-ids [:elapsed-time :memory] + :limit-time-s 5) + + (bench (+ 1 1) + :viewer :portal + :benchmark (criterium.benchmark/->benchmark + {:analyse [[:quantiles {:quantiles [0.025 0.5 0.975]}] + :outliers + :stats] + :view [:stats :quantiles :samples :histogram]})) + + For the portal viewer, you will need to have portal connected to tap>. + + Notes: + - Handles JVM warmup automatically + - Accounts for GC interference + - Ensures statistical significance + - Expression cannot refer to local bindings" + [expr & options] + (let [options-map (apply hash-map options) + expr-options (select-keys options-map [:time-fn]) + options (dissoc options-map :time-fn)] + `(bench-measured + (options->bench-plan ~options) + (measured/expr ~expr ~expr-options)))) diff --git a/bases/criterium/src/criterium/bench/config.clj b/bases/criterium/src/criterium/bench/config.clj new file mode 100644 index 0000000..e0f211e --- /dev/null +++ b/bases/criterium/src/criterium/bench/config.clj @@ -0,0 +1,105 @@ +(ns criterium.bench.config + (:require + [clojure.set :as set] + [criterium.bench-plans :as bench-plans] + [criterium.collect-plan.config :as collect-plan-config] + [criterium.collector :as collector] + [criterium.collector-configs :as collector-configs] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have]] + [criterium.util.units :as units] + [criterium.viewer.portal] + [criterium.viewer.pprint] + [criterium.viewer.print])) + +(defn metric-ids->collector-config + [metric-ids] + (let [metrics (zipmap + metric-ids + (mapv collector/maybe-var-get-stage metric-ids)) + terminator (util/filter-map collector/terminal? metrics) + stages (util/filter-map (complement collector/terminal?) metrics)] + (when (> (count terminator) 1) + (throw (ex-info + "More than one terminal function specified in metric-ids" + {:terminators (keys terminator)}))) + (when-let [unknown (not-empty (util/filter-map nil? metrics))] + (throw (ex-info + "Unknown metric-ids" + {:metric-ids (keys unknown)}))) + {:stages (filterv stages metric-ids) + :terminator (or (some-> terminator + first + key) + :elapsed-time)})) + +(defn config-map + "Convert option arguments into a criterium configuration map. + The config map specifies how criterium will execute." + [options-map] + (let [unknown-keys (set/difference + (set (keys options-map)) + #{:limit-time-s + :metric-ids + :return-value + :collect-plan + :analyse + :view + :bench-plan + :verbose + :viewer}) + limit-time-s (:limit-time-s options-map) + analyse (:analyse options-map) + view (:view options-map) + bench-plan (:bench-plan options-map) + options-map (cond-> options-map + (:limit-time-s options-map) + (assoc :limit-time-ns + (* (long limit-time-s) + (long units/SEC-NS)))) + collect-plan (or (:collect-plan options-map) + (:collect-plan bench-plan) + :with-jit-warmup) + collect-plan (if (keyword? collect-plan) + (collect-plan-config/collect-plan-config + collect-plan + options-map) + (collect-plan-config/collect-plan-config + (:scheme-type collect-plan) + options-map)) + scheme-type (have (:scheme-type collect-plan)) + collector-config (->> + (or (when-let [metric-ids (:metric-ids options-map)] + (metric-ids->collector-config metric-ids)) + (:collector-config bench-plan) + collector-configs/default-collector-config) + (collect-plan-config/ensure-pipeline-stages + scheme-type))] + + (when (seq unknown-keys) + (throw (ex-info "Unknown options" {:options unknown-keys}))) + (cond-> (assoc (select-keys + options-map + [:return-value :verbose]) + :collect-plan collect-plan + :collector-config collector-config + :viewer (:viewer options-map :print) + :return-value (:return-value + options-map + [:samples :expr-value])) + + (= scheme-type :with-jit-warmup) + (assoc :analyse (or analyse + (:analyse bench-plan) + (:analyse bench-plans/default-with-warmup)) + :view (or view + (:view bench-plan) + (:view bench-plans/default-with-warmup))) + + (= scheme-type :one-shot) + (assoc :analyse (or analyse + (:analyse bench-plan) + (:analyse bench-plans/default-one-shot)) + :view (or view + (:view bench-plan) + (:view bench-plans/default-one-shot)))))) diff --git a/bases/criterium/src/criterium/bench/impl.clj b/bases/criterium/src/criterium/bench/impl.clj new file mode 100644 index 0000000..5c3a3bb --- /dev/null +++ b/bases/criterium/src/criterium/bench/impl.clj @@ -0,0 +1,16 @@ +(ns criterium.bench.impl + "Internal implementation details for criterium.bench namespace. + Not intended for direct use by consumers of the library.") + +(def ^:private last-bench* (volatile! nil)) + +(defn last-bench! + "Store the results of the last benchmark execution." + [results] + (vreset! last-bench* results)) + +(defn last-bench + "Retrieve the results of the last benchmark execution. + Internal use only." + [] + @last-bench*) diff --git a/bases/criterium/src/criterium/bench_plans.clj b/bases/criterium/src/criterium/bench_plans.clj new file mode 100644 index 0000000..adbaa1a --- /dev/null +++ b/bases/criterium/src/criterium/bench_plans.clj @@ -0,0 +1,52 @@ +(ns criterium.bench-plans + "Provide pre-configured benchmark definitions.") + +(def default-collector-config + {:stages [] + :terminator :elapsed-time}) + +(def default-one-shot + {:collector-config default-collector-config + :analyse [:event-stats] + :view [:metrics + :event-stats + :collect-plan] + :viewer :print}) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def default-with-warmup + {:collector-config default-collector-config + :analyse [:transform-log + [:quantiles {:quantiles [0.9 0.99 0.99]}] + :outliers + [:stats {}] + [:stats {:samples-id :log-samples :id :log-stats}] + :event-stats] + :view [[:stats {:metric-ids [:memory]}] + [:stats {:stats-id :log-stats}] + :event-stats + :collect-plan + #_[:final-gc-warnings + {:warn-threshold 0.01}]] + :viewer :print}) + + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def log-histogram + {:collector-config default-collector-config + :analyse [:transform-log + [:quantiles {:quantiles [0.9 0.99 0.99]}] + :outliers + [:stats {}] + [:stats {:samples-id :log-samples :id :log-stats}] + :event-stats] + :view [[:stats {:metric-ids [:memory]}] + [:stats {:stats-id :log-stats}] + :quantiles + :event-stats + :outlier-counts + :collect-plan + :histogram + :sample-percentiles + :samples] + :viewer :print}) diff --git a/bases/criterium/src/criterium/benchmark.clj b/bases/criterium/src/criterium/benchmark.clj new file mode 100644 index 0000000..a4a8386 --- /dev/null +++ b/bases/criterium/src/criterium/benchmark.clj @@ -0,0 +1,122 @@ +(ns criterium.benchmark + "Namespace for composing and executing benchmarks from declarative specifications. + Provides functionality to construct benchmark functions from analysis and view + configurations." + (:require + [criterium.types :as types] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.view :as view])) + +(defn- resolve-analyse-fn + "Resolves a single analysis function specification. + If x is a sequence, treats first element as function and rest as args. + Otherwise treats x as a function name to resolve. + Returns a function of one argument (the sampled data)." + [x] + (let [options {:default-ns 'criterium.analyse}] + (if (sequential? x) + (apply (util/maybe-var-get (first x) options) (rest x)) + ((util/maybe-var-get x options))))) + +(defn- resolve-view-fn + "Resolves a single view function specification. + If x is a sequence, treats first element as function and rest as args. + Otherwise treats x as a function name to resolve. + Returns a function of one argument (the analysis result)." + [x] + (let [options {:default-ns 'criterium.view}] + (have + fn? + (if (sequential? x) + (apply (util/maybe-var-get (first x) options) (rest x)) + ((util/maybe-var-get x options))) + {:x x}))) + +(defn- resolve-analyse-fns + "Resolves all analysis functions from a specification vector." + [spec] + (mapv resolve-analyse-fn spec)) + +(defn- resolve-view-fns + "Resolves all view functions from a specification vector." + [spec] + (mapv resolve-view-fn spec)) + +(defn ->analyse + "Creates a composite analysis function from a sequence of analysis specs. + + Each spec is either a keyword/symbol to resolve a function, or a vector + with a keyword/symbol first element followed by arguments. + + Analysis functions: + - Are composed in sequence from last to first + - Each takes a sampled map as input + - Each returns a modified sampled map + + Example specs: [:stats + [:quantiles {:quantiles [0.025 0.975]}]] + + Returns a function that takes sampled data and returns analysis results." + [analyse-plan] + (when-not (or (nil? analyse-plan) (sequential? analyse-plan)) + (throw + (ex-info "analyse must be a sequence of specs" {:analyse analyse-plan}))) + (let [fns (resolve-analyse-fns analyse-plan)] + (reduce comp (reverse fns)))) + +(defn ->view + "Creates a composite view function from a sequence of view specs. + + Each spec is either a keyword/symbol to resolve a function, or a vector + with a keyword/symbol first element followed by arguments. + + View functions: + - Are called in order with the analysis result + - Are expected to produce side effects (printing, plotting etc.) + - Return values are ignored + + Example specs: [:text-table] + + Returns a function that takes analysis results and handles viewing." + [view-plan] + (when-not (or (nil? view-plan) (sequential? view-plan)) + (throw + (ex-info "view must be a sequence of specs" {:view view-plan}))) + (let [fns (resolve-view-fns view-plan)] + (fn [viewer result] + {:pre [(have? keyword? viewer) + (have? types/result-map? result)]} + (run! #(% viewer result) fns) + (view/flush-viewer viewer) + result))) + +(defn ->benchmark + "Compose a benchmark based on a declarative map. + + The :analyse and :view keywords take a vector of function specs. A + function spec is either a keyword/symbol to resolve a function, or a + vector with a keyword/symbol first element followed by arguments. + + Analysis functions: + - Are composed in sequence from last to first + - Each takes a sampled map as input + - Each returns a modified sampled map + + View functions: + - Are called in order with the final analysis result + - Are expected to produce side effects (printing, plotting etc.) + - Return values are ignored + + Example spec: + {:analyse [:stats [:quantiles {:quantiles [0.025 0.975]}]] + :view [:text-table]} + + Returns a function that takes a sampled map and returns analysis results." + [bench-plan] + (let [analyse-fn (->analyse (:analyse bench-plan)) + view-fn (->view (:view bench-plan))] + (fn [sampled] + (-> sampled + analyse-fn + view-fn)))) diff --git a/bases/criterium/src/criterium/collect.clj b/bases/criterium/src/criterium/collect.clj new file mode 100644 index 0000000..429c41b --- /dev/null +++ b/bases/criterium/src/criterium/collect.clj @@ -0,0 +1,237 @@ +(ns criterium.collect + "Collect samples using a metrics collector." + (:require + [criterium.collector :as collector] + [criterium.jvm :as jvm] + [criterium.measured :as measured] + [criterium.metric :as metric] + [criterium.types :as types] + [criterium.util.invariant :refer [have?]])) + +;;; Transform of samples + +(defn- sample-arrays->sample-maps + [sample-arrays collector] + (mapv (partial collector/transform collector) sample-arrays)) + +(defn sample-maps->map-of-samples + [samples metrics-defs] + (reduce + (fn [res {:keys [path]}] + (assoc res path (mapv #(get-in % path) samples))) + {} + (metric/all-metric-configs metrics-defs))) + +(defn transform + [collection-map] + (let [collector (:collector collection-map) + metrics-defs (:metrics-defs collector)] + (-> (:collections collection-map) + (sample-arrays->sample-maps collector) + (sample-maps->map-of-samples metrics-defs)))) + +;;; Memory management +(def ^:private force-gc-measured + (measured/expr (jvm/run-finalization-and-force-gc!))) + +(defn force-gc-no-capture! + "Force garbage collection and finalisers so that execution time + associated with this is not incurred at another time. Up to + max-attempts are run to clear all pending finalizers and free as much + memory as possible." + [^long num-gcs] + (dotimes [_ num-gcs] + (jvm/run-finalization-and-force-gc!))) + +(def ^:private force-gc-collector + (collector/collector + {:stages (mapv + collector/maybe-var-get-stage + [:garbage-collector :finalization]) + :terminator (collector/maybe-var-get-stage :elapsed-time)})) + +(defn force-gc! + "Force garbage collection and finalisers so that execution time + associated with this is not incurred at another time. Up to + max-attempts are run to clear all pending finalizers and free as much + memory as possible. + + Returns samples with GC execution time, total changes in memory, and + in object finalizers pending. + + Must be zero garbage sampling. Execution time is not critical." + [^long num-gcs] + {:post [(have? types/collection-map? %)]} + (let [args (measured/args force-gc-measured) + collector force-gc-collector + ti (unchecked-dec ^long (:length collector)) + collections (make-array Object num-gcs) + max-attempts (unchecked-dec num-gcs) + [num-attempts elapsed-time] + (loop [attempt 0 + elapsed-time 0] + (let [sample (collector/collect-array + collector + force-gc-measured + args + 1) + ^long t (.nth + ^clojure.lang.PersistentVector + (aget ^objects sample ti) + 0)] + (aset ^objects collections attempt sample) + (if (< attempt max-attempts) + (recur (inc attempt) (unchecked-add elapsed-time t)) + [attempt elapsed-time])))] + {:eval-count num-attempts + :elapsed-time elapsed-time + :collections collections + :num-samples num-attempts + :batch-size 1 + :collector collector})) + +;;; Batch Size + +(defn batch-size + "Return batch-size for the given time estimate and batch execution-time." + ^long [^long t0 ^long batch-time-ns] + (max 1 (long (/ batch-time-ns t0)))) + +;;; Timing + +(def ^:private throw-away-collector + (collector/collector + {:stages [] + :terminator :elapsed-time})) + +(defn throw-away-collection + "The initial measured evaluation is always un-representative. + This function throws it away, returning nil." + [measured] + (collector/collect-array + throw-away-collector + measured + (measured/args measured) + 1) + nil) + +(defn collect-arrays + "Take num-samples samples of measured using batch-size. + + The collector is used to collect each sample. + + This is memory allocation garbage free collection. + + Return a data map with the collected metric arrays on the :samples key. + This will need to be transformed to get the metrics data." + [collector + measured + batch-size-obj + num-samples] + {:post [(have? types/collection-map? %)]} + (let [num-samples (max 2 ^long num-samples) + collections (make-array Object num-samples) + ti (unchecked-dec ^long (:length collector)) + batch-size (long batch-size-obj)] + (loop [eval-count 0 + elapsed-time 0 + i 0] + ;; Try and get the scheduler to take the thread when we are not + ;; in the middle of a sample + (Thread/yield) + (let [args (measured/args measured) + sample (collector/collect-array + collector measured args batch-size-obj) + ^long t (.nth + ^clojure.lang.PersistentVector + (aget ^objects sample ti) + 0) + elapsed-time (unchecked-add elapsed-time t) + eval-count (unchecked-add eval-count batch-size)] + (aset ^objects collections i sample) + (if (< i (dec num-samples)) + (recur eval-count + elapsed-time + (unchecked-inc i)) + {:eval-count eval-count + :elapsed-time elapsed-time + :collections collections + :num-samples (count collections) + :batch-size batch-size + :collector collector}))))) + +(def ^:private elapsed-time-collector + (collector/collector + {:stages [] + :terminator :elapsed-time})) + +(defn elapsed-time-point-estimate + "Run measured for an initial estimate of the elapsed-time. + + Returns an estimated execution elapsed-time in ns." + ^long [measured] + (let [args (measured/args measured) + s0 (collector/collect elapsed-time-collector measured args 1)] + (metric/elapsed-time s0))) + +(defn elapsed-time-min-estimate + "Return an estimate for the execution elapsed-time of a measured. + + Repeatedly times the invocation of the function and returns the + minimum invocation time. + + For quick functions limit execution count, while for slower functions + limit total execution time. Limit evaluations to eval-budget, or + elapsed time to time-budget-ns." + [measured num-samples ^long batch-size] + {:post [(have? types/collection-map? %)]} + (let [collected (collect-arrays + elapsed-time-collector + measured + batch-size + num-samples) + collections (:collections collected) + num-samples (long num-samples) + min-t (loop [i 0 min-t Long/MAX_VALUE] + (if (>= i num-samples) + min-t + (let [sample (aget ^objects collections i) + vs (aget ^objects sample 0) + t (long (.nth + ^clojure.lang.PersistentVector vs + 0))] + (recur + (unchecked-inc i) + (if (< min-t t) min-t t))))) + min-t (max 1 (long (/ (long min-t) batch-size))) + sum-t (:elapsed-time collected)] + (assoc collected + :t min-t + :total-time sum-t))) + +(defn warmup + "Run measured for the given number of collections to enable JIT compilation. + Return a sampled map." + [collector measured ^long num-samples ^long batch-size] + {:post [(have? types/collection-map? %)]} + (loop [i num-samples + elapsed-time 0 + min-time Long/MAX_VALUE + collections []] + (let [args (measured/args measured) + collected (collector/collect collector measured args batch-size) + t (metric/elapsed-time collected) + elapsed-time (unchecked-add elapsed-time t)] + (if (pos? i) + (recur + (unchecked-dec i) + elapsed-time + (min min-time t) + (conj collections collected)) + {:eval-count (* num-samples batch-size) + :elapsed-time elapsed-time + :min-time min-time + :collections (conj collections collected) + :num-samples (count collections) + :batch-size batch-size + :collector collector})))) diff --git a/bases/criterium/src/criterium/collect_plan.clj b/bases/criterium/src/criterium/collect_plan.clj new file mode 100644 index 0000000..94c5be9 --- /dev/null +++ b/bases/criterium/src/criterium/collect_plan.clj @@ -0,0 +1,167 @@ +(ns criterium.collect-plan + "Collection plan to control the collection of metrics from a measured." + (:require + [criterium.collect :as collect] + [criterium.collect-plan.impl :as impl] + [criterium.collector :as collector] + [criterium.measured :as measured] + [criterium.metric :as metric] + [criterium.types :as types] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]])) + +(defn required-stages + "Metrics collection stages required for the given collection-scheme" + [collect-plan-id] + {:pre [(have? keyword? collect-plan-id)]} + (impl/required-stages* collect-plan-id)) + +(defmethod impl/required-stages* :one-shot + [_collect-plan] + []) + +(defmethod impl/required-stages* :with-jit-warmup + [_collect-plan] + [:measured-args + :compilation + :garbage-collector]) + +(def identity-transforms + {:sample-> (fn sample-> ^double [v] v) + :->sample (fn ->sample ^double [v] v)}) + +(defn- batch-transforms + [batch-size] + (let [batch-size (double batch-size)] ; boxed to Double in closure + {:sample-> (fn sample-> ^double [v] + (/ (double v) (double batch-size))) + :->sample (fn ->sample ^double [v] + (* (double v) (double batch-size)))})) + +(defmethod impl/collect* :one-shot + ;; Collects a Single sample measured with no warmup of the measured function. + ;; Forces GC. + ;; Return a sampled data map. + [collect-plan collector measured] + (let [args (measured/args measured) + sample (collector/collect collector measured args 1)] + (collect/force-gc! (:max-gc-attempts collect-plan)) + {:samples + {:type :criterium/metrics-samples + :metrics-defs (:metrics-defs collector) + :metric->values (collect/sample-maps->map-of-samples + [sample] + (:metrics-defs collector)) + :transform identity-transforms + :batch-size 1 + :elapsed-time (metric/elapsed-time sample) + :eval-count 1 + :num-samples 1 + :expr-value (:expr-value sample)}})) + +(defn- collected-data-map + [collection-map] + (have + types/collected-metrics-map? + (let [metric->values (collect/transform collection-map) + batch-size (:batch-size collection-map)] + (merge + collection-map + {:metric->values metric->values + :metrics-defs (have (:metrics-defs (:collector collection-map))) + :expr-value (last (metric->values [:expr-value])) + :type :criterium/metrics-samples + :transform (if (= 1 batch-size) + identity-transforms + (batch-transforms batch-size))})))) + +(defmethod impl/collect* :with-jit-warmup + ;; Sample measured with estimation, warmup and forced GC. + ;; Return a sampled data map. + g[collect-plan collector measured] + {:pre [(fn? (:f collector)) + (measured/measured? measured)] + :post [(have? types/result-map? %)]} + (let [{:keys [^long batch-time-ns + ^long limit-time-ns + ^long max-gc-attempts + ^long thread-priority + ^long num-estimation-samples + ^long num-warmup-samples + ^long num-measure-samples + keep-estimation? + keep-warmup? + keep-final-gc?]} collect-plan] + + ;; Start by running GC. + (collect/force-gc-no-capture! max-gc-attempts) + + ;; First sample is always much longer than subsequent ones + (collect/throw-away-collection measured) + + (util/with-thread-priority thread-priority + (let [total-samples (+ num-estimation-samples + num-warmup-samples + num-measure-samples) + + t0 (collect/elapsed-time-point-estimate measured) + est-batch-size (collect/batch-size t0 batch-time-ns) + frac-est (double (/ num-estimation-samples total-samples)) + num-est-samples (min num-estimation-samples + (long (/ (* limit-time-ns frac-est) + (* t0 est-batch-size)))) + est-data (collect/elapsed-time-min-estimate + measured + num-est-samples + est-batch-size) + t1 (long (:t est-data)) + + warmup-batch-size (collect/batch-size t1 batch-time-ns) + remaining-samples (+ num-warmup-samples num-measure-samples) + remaining-time (- limit-time-ns (long (:total-time est-data)) t0) + batch-time (* t1 warmup-batch-size) + projected-time (* batch-time remaining-samples) + + [num-warmup-samples + num-measure-samples] (impl/limit-samples + limit-time-ns + num-warmup-samples + num-measure-samples + (:total-time est-data) + remaining-time + projected-time) + + _ (collect/force-gc! max-gc-attempts) + + warmup-data (collect/warmup + collector + measured + num-warmup-samples + warmup-batch-size) + + t2 (max + 1 + (double (/ (long (:min-time warmup-data)) + warmup-batch-size))) + batch-size (collect/batch-size t2 batch-time-ns) + + ;; Enter garbage Free zone + _ (collect/force-gc-no-capture! max-gc-attempts) + sample-data (collect/collect-arrays + collector measured batch-size num-measure-samples) + final-gc-data (collect/force-gc! max-gc-attempts) + ;; Leave garbage Free zone + ] + + (cond-> + {:samples (collected-data-map sample-data)} + keep-estimation? (assoc :estimation (collected-data-map est-data)) + keep-warmup? (assoc :warmup (collected-data-map warmup-data)) + keep-final-gc? (assoc :final-gc + (collected-data-map final-gc-data))))))) + +(defn collect + "Collect metrics from the measured according to the collect-plan. + Return a results-map." + [collect-plan collector measured] + (impl/collect* collect-plan collector measured)) diff --git a/bases/criterium/src/criterium/collect_plan/config.clj b/bases/criterium/src/criterium/collect_plan/config.clj new file mode 100644 index 0000000..9721dfe --- /dev/null +++ b/bases/criterium/src/criterium/collect_plan/config.clj @@ -0,0 +1,88 @@ +(ns criterium.collect-plan.config + (:require + [criterium.collect-plan :as collect-plan] + [criterium.util.invariant :refer [have?]] + [criterium.util.units :as units])) + +(def ^Long DEFAULT-BATCH-TIME-NS + ;; This value is a trade-off. + ;; - We want enough time to make system timestamp quantisation insignificant, + ;; - We want to limit the time to avoid the thread scheduler taking the thread + ;; during sampling. + ;; - JII compilation happens on the sample pipeline batch, so increasing the + ;; batch time requires longer to reach JIT compilation thresholds. + ;; + ;; The value is based on a timestamp granularity of ~30ns. + ;; On linux sched_min_granularity_ns is often around 10ms. + ;; + ;; If you see sine waves or saw tooth patterns in the sample times of very + ;; fast functions, then is this probably to this value being too low. + ;; https://en.wikipedia.org/wiki/Nyquist_frequency + (* 10 ^long units/MICROSEC-NS)) + +(def ^Long DEFAULT-LIMIT-TIME-NS + ;; This limit is so that, by default, we don't spend more time that what + ;; a casual user might be willing to spend. + (* 10 ^long units/SEC-NS)) + +(def TARGET-ESTIMATION-SAMPLES 1000) +(def TARGET-WARMUP-SAMPLES 150000) +(def TARGET-SAMPLES 200) + +(defmulti collect-plan-config + (fn [collect-plan-id options] collect-plan-id)) + +(defmethod collect-plan-config :default + [collect-plan-id _] + (throw (ex-info "Unknown collect-plan" {:collect-plan collect-plan-id}))) + +(defmethod collect-plan-config :one-shot + [_collect-plan-id + {:keys [max-gc-attempts] + :as _options}] + {:scheme-type :one-shot + :max-gc-attempts (or max-gc-attempts 3)}) + +(defmethod collect-plan-config :with-jit-warmup + [_collect-plan-id + {:keys [num-estimation-samples + num-warmup-samples + num-measure-samples + max-gc-attempts + thread-priority + limit-time-ns + batch-time-ns] + :or {num-estimation-samples TARGET-ESTIMATION-SAMPLES + num-warmup-samples TARGET-WARMUP-SAMPLES + num-measure-samples TARGET-SAMPLES + limit-time-ns DEFAULT-LIMIT-TIME-NS + batch-time-ns DEFAULT-BATCH-TIME-NS + max-gc-attempts 6} + :as _options}] + {:scheme-type :with-jit-warmup + :batch-time-ns batch-time-ns + :max-gc-attempts max-gc-attempts + :thread-priority thread-priority + :limit-time-ns limit-time-ns + :num-estimation-samples num-estimation-samples + :num-warmup-samples num-warmup-samples + :num-measure-samples num-measure-samples}) + +(defmethod collect-plan-config :without-jit-warmup + [_collect-plan-id + options] + (collect-plan-config + :without-jit-warmup + (merge {:num-warmup-samples 0} options))) + +(defn ensure-pipeline-stages + "Add any injected stages that aren't already present." + [collect-plan-id collector-config] + {:pre [(have? keyword? collect-plan-id)]} + (let [stages (set (:stages collector-config)) + injected (collect-plan/required-stages collect-plan-id)] + (update + collector-config + :stages + (fnil into []) + (remove stages injected)))) diff --git a/bases/criterium/src/criterium/collect_plan/impl.clj b/bases/criterium/src/criterium/collect_plan/impl.clj new file mode 100644 index 0000000..7eee8a7 --- /dev/null +++ b/bases/criterium/src/criterium/collect_plan/impl.clj @@ -0,0 +1,35 @@ +(ns criterium.collect-plan.impl + (:require + [criterium.util.units :as units])) + +(defmulti required-stages* + "Pipeline stages required for the given schema-type" + (fn [scheme-type] scheme-type)) + +(defmulti collect* + #_{:clj-kondo/ignore [:unused-binding]} + (fn [collect-plan pipeline measured] + (:scheme-type collect-plan))) + +(defn limit-samples + [limit-time-ns + num-warmup-samples + num-measure-samples + taken-time + remaining-time + projected-time] + (if (> (long projected-time) (long remaining-time)) + (let [t (unchecked-add (long projected-time) (long taken-time)) + t-s (double (/ t (long units/SEC-NS))) + frac (/ (double remaining-time) (double projected-time))] + (println + (format + "Estimated time required for full JIT is %.3gs, but limited to %.3gs." + t-s + (double (/ (long limit-time-ns) (long units/SEC-NS))))) + (println (format " pass `:limit-time-s %.3g` to improve accuracy," t-s)) + (println " or consider benchmarks at a lower level.") + [(max 10 (long (* (long num-warmup-samples) frac))) + (max 10 (long (* (long num-measure-samples) frac)))]) + [(max 1 (long num-warmup-samples)) + (max 1 (long num-measure-samples))])) diff --git a/bases/criterium/src/criterium/collector.clj b/bases/criterium/src/criterium/collector.clj new file mode 100644 index 0000000..7da31c9 --- /dev/null +++ b/bases/criterium/src/criterium/collector.clj @@ -0,0 +1,85 @@ +(ns criterium.collector + "Metrics collector. + + A metrics collector collects metrics associated with executing a + measured. + + A metrics collector is a pipeline is a pipeline with two stages. It + collects metrics into and array with an element for each metric, + without creating any allocation garbage. The array is then + transformed into a map, keyed by metric id. + + The `collect-array` function takes a measure, a measured state, and + an eval count. It returns an array of sample data. The array is + allocated once, and all objects allocated during sampling are recorded + in the array, in order to make the sample phase garbage free. + + The pipeline `transform` takes the sample array, and returns a sample + map. The transform is free to create garbage. + + A pipeline is specified via keywords, which specify sample metrics to + be collecteds and a pipeline terminal function, which is responsible + for actually calling the measured. + + Each sample function can collect data before and after the measured's + execution." + (:require + [criterium.collector.impl :as impl] + [criterium.collector.metrics :as metrics])) + +;;; Collector Pipeline Stages + +(defn stage? + [x] + (and (map? x) (fn? (:m x)) (fn? (:x x)) (keyword? (:id x)))) + +(defn terminal? + [x] + (and (stage? x) (= :terminal (-> x meta ::stage-type)))) + +(defn maybe-var-get-stage [x] + (impl/maybe-var-get-stage x)) + +;;; Collector Pipeline Construction + +(defn collector + "Build a metrics collector pipeline by specifying metric-ids. + + Returns a collector map, containing pipeline phase functions, :f + and :x, and :metrics-defs keys." + [collector-config] + (let [collector-config (impl/maybe-var-get-config collector-config)] + (-> collector-config + impl/pipeline* + (assoc :metrics-defs + (select-keys (metrics/metrics) + (impl/metric-ids collector-config)))))) + +;;; Collector Pipeline Execution + +(defn collect-array + "Collect a metrics array from measured, returning the array. + + Runs the measured eval-count times. + + Return an array with an element for the data collected by each metric + in the collector pipeline." + ^objects [{:keys [f length] :as _collector} measured measured-args eval-count] + (let [^objects collected (make-array Object length)] + (f collected measured measured-args eval-count 0) + collected)) + +(defn transform + "Transform the collection array into a metrics data map. + Return a map with a top level key for each metric in the collector." + [collector sample] + ((:x collector) sample 0) + (reduce merge {} sample)) + +(defn collect + "Collect metrics from measured, returning a metrics data map. + + Convenience function." + [collector measured measured-args eval-count] + (let [collected (collect-array collector measured measured-args eval-count)] + (transform collector collected))) diff --git a/bases/criterium/src/criterium/collector/fns.clj b/bases/criterium/src/criterium/collector/fns.clj new file mode 100644 index 0000000..edbfcbc --- /dev/null +++ b/bases/criterium/src/criterium/collector/fns.clj @@ -0,0 +1,281 @@ +(ns criterium.collector.fns + "A pipeline function takes a sample, a measured state, and a measured, + calls the next pipeline function and returns an updated sample state. + It is usually called via the execute function. + + A pipeline function can be composed with other pipeline functions and + a pipeline terminal function, which is responsible for actually + calling the measured. + + Each pipeline function collects one or metrics around the measured's + invocation." + (:require + [criterium.jvm :as jvm] + [criterium.measured :as measured])) + +;;; Helpers + +(defn sample-gensym [] + (with-meta (gensym "sample") {:tag 'objects})) + +(defn- capture-syms [sample measured state eval-count result-index] + (let [sample-sym (sample-gensym) + measured-sym (gensym "measured") + state-sym (gensym "state") + eval-count-sym (gensym "eval-count") + result-index-sym (gensym "result-index")] + {:let-block (mapcat + #(vector %1 %2) + [sample-sym measured-sym state-sym + eval-count-sym result-index-sym] + [sample measured state eval-count result-index]) + :invoke-next (fn [next-fns] + ((first next-fns) + (rest next-fns) + sample-sym + measured-sym + state-sym + eval-count-sym + `(unchecked-inc ~result-index-sym))) + :sample-sym sample-sym + :measured-sym measured-sym + :state-sym state-sym + :eval-count-sym eval-count-sym + :result-index-sym result-index-sym})) + +(defrecord ^:private SampleStage + [m x id]) + +;;; Terminal function + +(defn- elapsed-time-sample-m + "A terminal function to execute measured, adding results to the sample. + + Puts: + - elapsed time in nanoseconds onto the :elapsed-time key in data. + - (an example of) the expression value on the :expr-value key. + - the number of evals on the :eval-count key." + [next-fns sample measured state eval-count result-index] + {:pre [(empty? next-fns)]} + (let [sample-sym (sample-gensym)] + `(let [~sample-sym ~sample + measured# ~measured + state# ~state + eval-count# ~eval-count + result-index# ~result-index] + (aset ~sample-sym result-index# + (measured/invoke measured# state# eval-count#))))) + +(defn- elapsed-time-xform + [sample ^long result-index] + (let [v (aget ^objects sample result-index)] + (aset ^objects sample result-index + {:elapsed-time (v 0) + :expr-value (v 1)}) + nil)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def elapsed-time + (with-meta + (->SampleStage elapsed-time-sample-m elapsed-time-xform :elapsed-time) + {:criterium.collector/stage-type :terminal})) + +;;; Sample Pipeline Stages + +;; Stages can be composed. + +;;;; Measured state + +(defn- measured-args-sample-m + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym state-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym ~state-sym) + nil))) + +(defn- measured-args-xform + [next-fn] + (fn [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:args (aget ^objects sample result-index)}) + nil)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def measured-args + (->SampleStage measured-args-sample-m measured-args-xform :measured-args)) + +;;;; Class Loader + +(defn- class-loader-sample-m + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block + start# (jvm/class-loader-counts)] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym [start# (jvm/class-loader-counts)]) + nil))) + +(defn- class-loader-xform + [next-fn] + (fn [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:class-loader + (apply jvm/class-loader-counts-change + (aget ^objects sample result-index))}))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def class-loader + (->SampleStage class-loader-sample-m class-loader-xform :class-loader)) + +;;;; Compilation + +(defn- compilation-sample-m + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block + start# (jvm/compilation-sample)] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym [start# (jvm/compilation-sample)]) + nil))) + +(defn- compilation-xform + [next-fn] + (fn [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:compilation + (apply jvm/compilation-change + (aget ^objects sample result-index))}))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def compilation + (->SampleStage compilation-sample-m compilation-xform :compilation)) + +;;;; Memory + +(defn- memory-sample-m + "Execute measured, add compilation time to the data map. + + Adds a map to the :memory key in data. The map contains sub-maps for + each type of memory, and the total memory (on the :total key). Each + sub-map contains the :init, :committed, :max and :used keys. + + Uses the MemoryMXBean." + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block + start# (jvm/memory-sample)] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym [start# (jvm/memory-sample)]) + nil))) + +(defn- memory-xform + [next-fn] + (fn [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:memory (apply jvm/memory-change + (aget ^objects sample result-index))}))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def memory + (->SampleStage memory-sample-m memory-xform :memory)) + +;;;; Finalization + +(defn- finalization-sample-m + "Execute measured, add pending finalization count to the data map. + + Adds maps to the :finalization key in data, with the :start, :finish, + and :delta sub-keys. + + Uses the MemoryMXBean." + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block + start# (jvm/finalization-sample)] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym + [start# (jvm/finalization-sample)]) + nil))) + +(defn- finalization-xform + [next-fn] + (fn finalization-xform [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:finalization + (apply jvm/finalization-change + (aget ^objects sample result-index))}))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def finalization + (->SampleStage finalization-sample-m finalization-xform :finalization)) + +;;;; Garbage-collector + +(defn- garbage-collector-sample-m + "Execute measured, add garbage collection counts and times to the data map. + + Uses the GarbageCollectorMXBean beans." + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block + start# (jvm/garbage-collector-sample)] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym + [start# (jvm/garbage-collector-sample)]) + nil))) + +(defn- garbage-collector-xform + [next-fn] + (fn garbage-collector-xform + [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:garbage-collector + (apply jvm/garbage-collector-change + (aget ^objects sample result-index))}))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def garbage-collector + (->SampleStage + garbage-collector-sample-m garbage-collector-xform :garbage-collector)) + +;;;; Thread Memory Allocation + +(defn- thread-allocation-sample-m + "Collect sample with the thread memory allocation." + [next-fns sample measured state eval-count result-index] + (let [{:keys [let-block invoke-next sample-sym result-index-sym]} + (capture-syms sample measured state eval-count result-index)] + `(let [~@let-block + start# (jvm/thread-allocated-bytes)] + ~(invoke-next next-fns) + (aset ~sample-sym ~result-index-sym + [start# (jvm/thread-allocated-bytes)]) + nil))) + +(defn- thread-allocation-xform + [next-fn] + (fn thread-allocation-xform + [sample ^long result-index] + (next-fn sample (unchecked-inc result-index)) + (aset ^objects sample result-index + {:thread-allocation + (apply jvm/thread-allocated-change + (aget ^objects sample result-index))}))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def thread-allocation + (->SampleStage + thread-allocation-sample-m thread-allocation-xform :thread-allocation)) diff --git a/bases/criterium/src/criterium/collector/impl.clj b/bases/criterium/src/criterium/collector/impl.clj new file mode 100644 index 0000000..091d52b --- /dev/null +++ b/bases/criterium/src/criterium/collector/impl.clj @@ -0,0 +1,116 @@ +(ns criterium.collector.impl + (:require + [criterium.collector.fns :as fns] + [criterium.util.helpers :as util])) + +(defn maybe-var-get-stage [x] + (util/maybe-var-get x {:default-ns 'criterium.collector.fns})) + +(defn maybe-var-get-stages [x] + (mapv maybe-var-get-stage x)) + +(defn maybe-var-get-config + [config] + (-> config + (update :stages maybe-var-get-stages) + (update :terminator maybe-var-get-stage))) + +(defn metric-ids + "Return a sequence of all metrics produced by a pipeline with the + given pipeline config." + [{:keys [stages terminator] :as _pipline-config}] + {:pre [stages terminator]} + (mapv :id (conj stages terminator))) + +;;; Pipeline construction + +(defn pipeline-xform-fn + "Build a pipeline xform by specifying pipeline function keywords. + + Returns an xform function." + [{:keys [stages terminator] :as _collector-config}] + (let [terminal-fn (:x terminator)] + (when-not terminal-fn + (throw (ex-info "Unknown terminator function" + {:stages stages + :terminator terminator}))) + (reduce + (fn [pipeline stage] + (let [f (:x stage)] + (when-not f + (throw (ex-info "Unknown pipeline xform function" {:stage stage}))) + (f pipeline))) + terminal-fn + stages))) + +(defn- stage-m [stage] + (let [m (:m stage)] + (when-not m + (throw (ex-info "Unknown pipeline function m" {:stage stage}))) + m)) + +(defn pipeline-sample-fn* + "Build a pipeline sample function by specifying pipeline function keywords. + + Returns a function to collect a sample." + ;; We want a garbage free sample function. This means: + ;; can't use persistent collections + ;; => we need to index into a result collction + ;; => we need to use primitive math + ;; => we can't use function composition + ;; + ;; the result is using macros and eval. + [{:keys [stages terminator] :as collector-config}] + (let [terminal-m (:m terminator) + _ (when-not terminal-m + (throw + (ex-info + "Unknown terminator function" + {:collector-config collector-config}))) + stage-ms (conj + (mapv stage-m (reverse stages)) + terminal-m) + sample-sym (fns/sample-gensym) + measured-sym (gensym "measured") + state-sym (gensym "state") + eval-count-sym (gensym "eval-count") + ;; eval-count-sym (with-meta (gensym "eval-count") {:tag 'long}) + result-index-sym1 (gensym "result-index") + result-index-sym (with-meta (gensym "result-index") {:tag 'long})] + `(fn ~'sample + [~sample-sym + ~measured-sym + ~state-sym + ~eval-count-sym + ~result-index-sym1] + (let [~result-index-sym ~result-index-sym1] + ~((first stage-ms) + (rest stage-ms) + sample-sym + measured-sym + state-sym + eval-count-sym + result-index-sym))))) + +(defn pipeline-sample-fn + "Build a pipeline sample function by specifying pipeline function keywords. + + Returns a function to collect a sample." + [collector-config] + (binding [*compiler-options* + (merge *compiler-options* + {:direct-linking true})] + (eval (pipeline-sample-fn* collector-config)))) + +(defrecord SamplePipeline + [f x length]) + +(defn pipeline* + "Build a pipeline by specifying pipeline function keywords. + + Returns an updated state, adding :pipeline and :metrics keys." + [collector-config] + (map->SamplePipeline + {:f (pipeline-sample-fn collector-config) + :x (pipeline-xform-fn collector-config) + :length (inc (count (:stages collector-config 0)))})) diff --git a/bases/criterium/src/criterium/collector/metrics.clj b/bases/criterium/src/criterium/collector/metrics.clj new file mode 100644 index 0000000..761859e --- /dev/null +++ b/bases/criterium/src/criterium/collector/metrics.clj @@ -0,0 +1,92 @@ +(ns criterium.collector.metrics + (:require + [criterium.jvm :as jvm])) + +(defn metrics + "Return the default platform metrics configuration map." + [] + {:elapsed-time + {:type :quantitative + :values [{:path [:elapsed-time] + :dimension :time + :scale 1e-9 + :type :quantitative + :label "Elapsed Time"} + {:path [:expr-value] + :dimension :fn-value + :scale 1 + :type :nominal + :label "Expr value"}]} + :memory + {:type :quantitative + :values [{:path [:memory :heap :used] + :dimension :memory + :scale 1 + :type :quantitative + :label "Heap Memory Used"} + {:path [:memory :non-heap :used] + :dimension :memory + :scale 1 + :type :quantitative + :label "Non-Heap Memory Used"} + {:path [:memory :total :used] + :dimension :memory + :scale 1 + :type :quantitative + :label "Total Memory Used"}]} + :thread-allocation + {:type :quantitative + :values [{:path [:thread-allocation] + :dimension :memory + :scale 1 + :type :quantitative + :label "Thread allocated memory"}]} + :class-loader + {:type :event + :label "ClassLoader" + :summary "%32s: loaded %s and unloaded %s classes in %s samples" + :values [{:path [:class-loader :loaded-count] + :dimension :count + :scale 1 + :type :event + :label "Num loaded classes"} + {:path [:class-loader :unloaded-count] + :dimension :count + :scale 1 + :type :event + :label "Num unloaded classes"}]} + :compilation + {:type :event + :label "JIT compilation" + :summary "%32s: ran for %s in %s samples" + :values [{:path [:compilation :time-ms] + :dimension :time + :scale 1e-3 + :type :event + :label "time"}]} + :garbage-collector + {:type :event + :label "Garbage Collector" + :groups ; the groups map has the layout of the top level map + (into + {} + (mapv + (fn [n k] + [k (hash-map + :label n + :summary "%32s: ran %s times for a total of %s in %s samples" + :values + [{:path [:garbage-collector k :time-ms] + :dimension :time + :scale 1e-3 + :label (str n " time") + :type :event + :group k} + {:path [:garbage-collector k :count] + :dimension :count + :scale 1 + :label (str n " count") + :type :event + :group k}])]) + (jvm/garbage-collector-names) + (jvm/garbage-collector-keywords)))}}) diff --git a/bases/criterium/src/criterium/collector_configs.clj b/bases/criterium/src/criterium/collector_configs.clj new file mode 100644 index 0000000..4aef1a9 --- /dev/null +++ b/bases/criterium/src/criterium/collector_configs.clj @@ -0,0 +1,6 @@ +(ns criterium.collector-configs + "Provide pre-configured collector configs") + +(def default-collector-config + {:stages [] + :terminator :elapsed-time}) diff --git a/bases/criterium/src/criterium/core.clj b/bases/criterium/src/criterium/core.clj new file mode 100644 index 0000000..5be2aff --- /dev/null +++ b/bases/criterium/src/criterium/core.clj @@ -0,0 +1,240 @@ +(ns ^{:author "Hugo Duncan" + :see-also + [["http://github.com/hugoduncan/criterium" "Source code"] + ["http://hugoduncan.github.com/criterium" "API Documentation"]]} + criterium.core + "Criterium measures the computation time of an expression. It is + designed to address some of the pitfalls of benchmarking, and benchmarking on + the JVM in particular. + + This includes: + - statistical processing of multiple evaluations + - inclusion of a warm-up period, designed to allow the JIT compiler to + optimise its code + - purging of gc before testing, to isolate timings from GC state prior + to testing + - a final forced GC after testing to estimate impact of cleanup on the + timing results + + Usage: + (use 'criterium.core) + (bench (Thread/sleep 1000) :verbose) + (with-progress-reporting (bench (Thread/sleep 1000) :verbose)) + (report-result (benchmark (Thread/sleep 1000)) :verbose) + (report-result (quick-bench (Thread/sleep 1000))) + + References: + See http://www.ellipticgroup.com/html/benchmarkingArticle.html for a Java + benchmarking library. The accompanying article describes many of the JVM + benchmarking pitfalls. + + See http://hackage.haskell.org/package/criterion for a Haskell benchmarking + library that applies many of the same statistical techniques." + (:require + [clojure.set :as set] + [criterium.bench :as bench] + [criterium.benchmark :as benchmark] + [criterium.jvm :as jvm] + [criterium.measured :as measured] + [criterium.collector-configs :as collector-configs] + [criterium.collect-plan.config :as collect-plan-config])) + +;; Default values controlling behaviour + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def ^:dynamic *final-gc-problem-threshold* + "Fraction of excution time allowed for final cleanup before a + warning is issued." + 0.01) + +(def ^:const s-to-ns (* 1000 1000 1000)) ; in ns + +(def ^:dynamic *warmup-jit-period* + "Time period used to let the code run so that jit compiler can do + its work." + (* 10 s-to-ns)) ; in ns + +(def ^:dynamic *sample-count* + "Number of executions required" + 60) + +(def ^:dynamic *target-execution-time* + "Target elapsed time for execution for a single measurement." + (* 1 s-to-ns)) ; in ns + +(def ^:dynamic *max-gc-attempts* + "Maximum number of attempts to run finalisers and gc." + 100) + +(def ^:dynamic *default-benchmark-opts* + {:max-gc-attempts *max-gc-attempts* + :num-samples *sample-count* + :target-execution-time *target-execution-time* + :warmup-jit-period *warmup-jit-period* + :tail-quantile 0.025 + :bootstrap-size 1000}) + +(def ^:dynamic *default-quick-bench-opts* + {:max-gc-attempts *max-gc-attempts* + :num-samples (/ (long *sample-count*) 10) + :target-execution-time (/ (long *target-execution-time*) 10) + :warmup-jit-period (/ (long *warmup-jit-period*) 2) + :tail-quantile 0.025 + :bootstrap-size 500}) + +(defn options->time-config + [{:keys [max-gc-attempts target-execution-time warmup-jit-period] + :as options}] + {:collect-plan + (collect-plan-config/collect-plan-config + :with-jit-warmup + {:max-gc-attempts max-gc-attempts + :batch-time-ns target-execution-time + :warmup-period-ns warmup-jit-period}) + :collector-config collector-configs/default-collector-config + :analyse [:stats + :event-stats] + :view (into (filterv some? + [(when (:os options) + :os) + (when (:runtime options) + :runtime)]) + [:stats + :event-stats]) + :return-value [:samples :expr-value] + :viewer :print}) + +;;; Progress reporting + +(def ^:dynamic *report-progress* + "Flag to control output of progress messages" + nil) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn ^:skip-wiki progress + "Conditionally report progress to *out*." + [& message] + (when *report-progress* + (apply println message))) + +(def ^:dynamic *report-debug* + "Flag to control output of debug messages" + nil) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn ^:skip-wiki debug + "Conditionally report debug to *out*." + [& message] + (when *report-debug* + (apply println message))) + +(def ^:dynamic *report-warn* + "Flag to control output of warning messages" + nil) + +(defn ^:skip-wiki warn + "Conditionally report warn to *out*." + [& message] + (when *report-warn* + (apply println "WARNING:" message))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmacro with-progress-reporting + "Macro to enable progress reporting during the benchmark." + [expr] + `(binding [*report-progress* true] + ~expr)) + +;;; Overhead estimation + +(declare benchmark*) + +;;; Options + +(defn extract-report-options + "Extract reporting options from the given options vector. Returns a two + element vector containing the reporting options followed by the non-reporting + options" + [opts] + (let [known-options #{:os :runtime :verbose} + option-set (set opts)] + (into + (vec + (interleave + (set/intersection known-options option-set) + (repeat true))) + (remove #(contains? known-options %1) opts)))) + +;;; Sample statistic + +(defn warn-on-suspicious-jvm-options + "Warn if the JIT options are suspicious looking." + [] + (let [compiler (jvm/jit-name) + {:keys [input-arguments]} (jvm/runtime-details)] + (when-let [arg (and (re-find #"Tiered" compiler) + (some #(re-find #"TieredStopAtLevel=(.*)" %) + input-arguments))] + (warn + "JVM argument" (first arg) "is active," + "and may lead to unexpected results as JIT C2 compiler may not be active." + "See http://www.slideshare.net/CharlesNutter/javaone-2012-jvm-jit-for-dummies.")))) + +;;; User top level functions + +(defn benchmark* + "Benchmark a function. This tries its best to eliminate sources of error. + This also means that it runs for a while. It will typically take 70s for a + fast test expression (less than 1s run time) or 10s plus 60 run times for + longer running expressions." + [measured + {:keys [supress-jvm-option-warnings] + :as options}] + {:pre [(measured/measured? measured)]} + (when-not supress-jvm-option-warnings + (warn-on-suspicious-jvm-options)) + (let [opts (merge *default-benchmark-opts* options) + config (options->time-config opts)] + (bench/bench-measured config measured))) + +(defmacro ^:deprecated benchmark + "Benchmark an expression. This tries its best to eliminate sources of error. + This also means that it runs for a while. It will typically take 70s for a + fast test expression (less than 1s run time) or 10s plus 60 run times for + longer running expressions." + [expr options] + `(benchmark* (measured/expr ~expr) ~options)) + +(defn quick-benchmark* + "Benchmark an expression. Less rigorous benchmark (higher uncertainty)." + [f {:as options}] + (benchmark* f (merge *default-quick-bench-opts* options))) + +(defmacro ^:deprecated quick-benchmark + "Benchmark an expression. Less rigorous benchmark (higher uncertainty)." + [expr options] + `(quick-benchmark* (measured/expr ~expr) ~options)) + +;;; All in one invocations + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var :deprecated-var]} +(defmacro bench + "Convenience macro for benchmarking an expression, expr. Results are reported + to *out* in human readable format. Options for report format are: :os, + :runtime, and :verbose." + [expr & opts] + (let [options (extract-report-options opts)] + `(benchmark + ~expr + ~(when (seq options) (apply hash-map options))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var :deprecated-var]} +(defmacro quick-bench + "Convenience macro for benchmarking an expression, expr. Results are reported + to *out* in human readable format. Options for report format are: :os, + :runtime, and :verbose." + [expr & opts] + (let [options (extract-report-options opts)] + `(quick-benchmark + ~expr + ~(when (seq options) (apply hash-map options))))) diff --git a/bases/criterium/src/criterium/instrument.clj b/bases/criterium/src/criterium/instrument.clj new file mode 100644 index 0000000..4481dd0 --- /dev/null +++ b/bases/criterium/src/criterium/instrument.clj @@ -0,0 +1,73 @@ +(ns criterium.instrument + "Instrumentation facilities for collecting performance samples from functions. + + This namespace provides tools for measuring function performance + during normal execution, outside of criterium's direct control. It + works by wrapping functions with instrumentation code that collects + timing data while preserving the original function behavior. + + Key features: + - Non-intrusive function wrapping that maintains original behavior + - Automatic sample collection during function execution + - Safe metadata management for storing/restoring original functions + - Integration with criterium's analysis pipeline + + Example usage: + ```clojure + (with-instrumentation [my-fn collector-config] + (some-code + (my-fn args))) + ``` + + The instrumentation can also be manually controlled using + instrument!/uninstrument! for more fine-grained control over the + scope which is sampled." + (:refer-clojure :exclude [reset!]) + (:require + [criterium.collector :as collector] + [criterium.instrument-fn :as instrument-fn])) + +(def ^:private original-f ::original-f) + +(defn instrument! + "Add instrumentation to the var, v, for performance sampling. + + Takes a var and a collector configuration, wraps the function to + collect timing samples during execution while preserving the original + function behavior. The instrumentation stores the original function + and sample data in the var's metadata. + + This function is idempotent - calling it multiple times on the same var + will only instrument it once. + + You must use uninstrument! to remove the instrumentation and restore + the original function. + + Parameters: + v - The var to instrument (e.g. #'my-namespace/my-function) + collector-config - A collector pipeline configuration that defines + how samples are processed + + Side effects: + - Modifies the var's root binding to install the instrumented function + - Adds metadata to track the original function and store samples" + [v collector-config] + (when-not (original-f (meta v)) + (let [inst-fn (instrument-fn/instrument-fn @v collector-config)] + (alter-meta! v assoc original-f @v) + (alter-var-root v (constantly inst-fn))))) + +(defn uninstrument! + "Remove instrumentation from the var, v and restore original function. + + Reverses the effects of instrument! by: + - Restoring the original function as the var's root binding + - Removing tracking metadata added during instrumentation + + This function is idempotent - calling it multiple times on the same var + is safe and will only uninstrument once. Safe to call on vars that + aren't instrumented." + [v] + (when-let [f (original-f (meta v))] + (alter-var-root v (constantly f)) + (alter-meta! v dissoc original-f))) diff --git a/bases/criterium/src/criterium/instrument_fn.clj b/bases/criterium/src/criterium/instrument_fn.clj new file mode 100644 index 0000000..1ce57db --- /dev/null +++ b/bases/criterium/src/criterium/instrument_fn.clj @@ -0,0 +1,159 @@ +(ns criterium.instrument-fn + "First-class function instrumentation for performance sampling. + + Provides functionality for wrapping functions with instrumentation code + that collects performance data during execution. The instrumented functions + are first-class objects that maintain their own sample collection state." + (:require + [criterium.collect :as collect] + [criterium.collect-plan :as collect-plan] + [criterium.collector :as collector] + [criterium.jvm :as jvm] + [criterium.measured :as measured] + [criterium.sampler :as sampler] + [criterium.util.invariant :refer [have]]) + (:import + [java.util.concurrent Callable])) + +(defn- measured [original-fn] + (measured/measured + (fn state-f [] (assert false)) + (fn measured-f [args ^long eval-count] + (have (partial = 1) eval-count) + (let [start (jvm/timestamp) + res (apply original-fn args) + finish (jvm/timestamp)] + [(unchecked-subtract finish start) res])))) + +(defmacro ^:private invoke-f + [collector measured args] + `(let [sample# (collector/collect ~collector ~measured ~args 1)] + (set! ~'samples (conj ~'samples sample#)) + (:expr-value sample#))) + +(defn- sample-map + "Convert raw samples into an analyzable sample map structure. + + Takes collected samples and metrics configurations and produces a map + in the format expected by criterium's analysis functions." + [metrics-defs samples] + {:type :criterium/metrics-samples + :metric->values (collect/sample-maps->map-of-samples + samples + metrics-defs) + :transform collect-plan/identity-transforms + :batch-size 1 + :eval-count (count samples) + :num-samples (count samples) + :metrics-defs (have metrics-defs) + :expr-value nil + :source-id nil}) + +(deftype InstrumentedFn + [original-fn + collector + measured + ^:volatile-mutable samples] + sampler/Sampler + (samples-map [_] (sample-map (:metrics-defs collector) samples)) + (reset-samples! [_] (set! samples []) nil) + + clojure.lang.IFn + (invoke [this] + (invoke-f collector measured [])) + (invoke [this a1] + (invoke-f collector measured [a1])) + (invoke [this a1 a2] + (invoke-f collector measured [a1 a2])) + (invoke [this a1 a2 a3] + (invoke-f collector measured [a1 a2 a3])) + (invoke [this a1 a2 a3 a4] + (invoke-f collector measured [a1 a2 a3 a4])) + (invoke [this a1 a2 a3 a4 a5] + (invoke-f collector measured [a1 a2 a3 a4 a5])) + (invoke [this a1 a2 a3 a4 a5 a6] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6])) + (invoke [this a1 a2 a3 a4 a5 a6 a7] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7 a8])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7 a8 a9])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13] + (invoke-f collector measured [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18])) + (invoke + [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19])) + (invoke + [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19 a20] + (invoke-f + collector + measured + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19 a20])) + (applyTo [this args] + (invoke-f collector measured args)) + + Runnable + (run [this] + (invoke-f collector measured [])) + + Callable + (call [this] + (invoke-f collector measured []))) + +(defn instrument-fn + "Create an instrumented wrapper of function f. + + Takes a function and a collector configuration and returns a new + function that wraps the original while collecting timing samples + during execution. The returned function implements IFn, Runnable, and + Callable interfaces. + + Parameters: + f - The function to instrument + collector-config - A collector configuration that defines how samples + are processed + + Returns: + An InstrumentedFn instance that wraps the original function and maintains + its own sample collection state." + [f collector-config] + (->InstrumentedFn + f + (collector/collector collector-config) + (measured f) + [])) diff --git a/bases/criterium/src/criterium/jvm.clj b/bases/criterium/src/criterium/jvm.clj new file mode 100644 index 0000000..25aa1e0 --- /dev/null +++ b/bases/criterium/src/criterium/jvm.clj @@ -0,0 +1,776 @@ +(ns criterium.jvm + "JVM monitoring and management interface. + + Provides zero-garbage access to JVM metrics and controls via JMX + management beans. + + Core capabilities include: + + Time Management + Memory Management + Thread Management + JMX Bean Access + + Key design principles: + - Zero garbage sampling methods for performance measurement + - Thread-safe monitoring capabilities + - Consistent snapshot semantics + - High-precision timing functions + + Performance characteristics: + - Sampling functions avoid allocation + - Low-overhead monitoring options + - Batch collection capabilities + + Usage notes: + - Use -sample variants for time series collection + - Monitor allocation in performance-sensitive code + - Verify timing precision requirements" + (:require + [criterium.jvm.impl :as impl])) + +;;; Elapsed time measurement + +(defmacro timestamp + "Return the current value of the JVM's high-resolution time source. + + The value is in nanoseconds. but does not necessarily have nanosecond + resolution. The actual resolution depends on the JVM and OS. + + The value can only be used meaningfully by passing it to elapsed-time. + Direct subtraction will fail for intervals including a clock wrap-around. + + Performance: + - Zero allocation + + Returns: + long - The current timestamp in nanoseconds" + [] `(System/nanoTime)) + +(defn elapsed-time + "Return the elapsed nanoseconds between two nanosecond timestamps. + + Safely handles clock wrap-around that may occur between timestamps. + + Parameters: + first-timestamp - The earlier timestamp in nanoseconds + last-timestamp - The later timestamp in nanoseconds + + Performance: + - Zero allocation + - Uses primitive arithmetic + + Returns: + long - The elapsed time in nanoseconds, which may be negative if + last-timestamp is before first-timestamp" + ^long [^long first-timestamp ^long last-timestamp] + (unchecked-subtract last-timestamp first-timestamp)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn wait + "Busy-wait for a specified number of nanoseconds. + + Spins in a tight loop checking elapsed time until the requested + interval has passed. Uses high-resolution timer for precise delays. + + This is a CPU-intensive operation that: + - Does not release the thread + - Does not allow other threads to run + - Consumes CPU continuously + + Parameters: + ns - Time to wait in nanoseconds + + Performance: + - Zero garbage allocation + - ~10 microsecond minimum practical resolution + - High CPU usage during wait + - May be affected by OS scheduling + + Threading: + - Blocks only the calling thread + - Other threads continue normally + - Timer interrupts still occur + + Returns: + nil - Avoids object allocation for benchmarking + + Notes: + Intended for criterium internal testing only. + Not recommended for production timing control." + [^long ns] + (let [start (timestamp)] + (loop [] + (when (< (elapsed-time start (timestamp)) ns) + (recur))))) + +;;; GC control + +(defn run-finalization! + "Run the finalization methods of any objects pending finalization. + + Request that the JVM run finalization methods of objects pending + finalization. On return, the JVM has made a best effort to complete + all outstanding finalizations. + + This is a hint to the JVM, not a guarantee. Some finalizers may still + be pending after the call returns. + + Performance: + - Execution time varies with number of pending finalizations + - May block calling thread + - Should not be called in performance-sensitive code + + Threading: + - May temporarily impact performance of other threads" + [] + (System/runFinalization)) + +(defn force-gc! + "Run the garbage collector. + + Suggest the JVM expend effort to collect unused objects. On return, + the JVM has made a best effort to reclaim space from all unreferenced + objects. + + This is a hint to the JVM, not a guarantee. The JVM may: + - Choose to ignore the request + - Delay collection until later + - Perform only a partial collection + + Performance: + - Execution time varies with heap size and object count + - Will temporarily impact performance of other threads + - Should not be called in performance-sensitive code + + Threading: + - Will impact performance of all threads" + [] + (System/gc)) + +(defn run-finalization-and-force-gc! + "Run object finalization and then force GC. + This cleans up memory. + Repeated invocation may free up more memory." + [] + (run-finalization!) + (force-gc!)) + +;;; ClassLoadingMXBean + +(defn class-loader-counts + "Return a map of class loader statistics from the JVM's ClassLoadingMXBean. + + Return counts of classes loaded and unloaded since JVM start. + These counts are cumulative and monotonically increasing. + + Return value contains: + :loaded-count - Total number of classes loaded (long) + :unloaded-count - Total number of classes unloaded (long) + + Performance: + - Zero garbage when used with class-loader-counts-change + - Safe for frequent sampling + - Negligible CPU overhead + + Threading: + - Consistent snapshot of both counts" + [] + (impl/class-loader-counts)) + +(defn class-loader-counts-change + "Return a JvmClassLoaderState record with loaded class counts. + + Contains :loaded-count and :unloaded-count fields. + + Satisfies the StateChanged protocol, providing the state-changed? + and state-delta methods. + + These are counts since the start of the JVM." + [first-sample last-sample] + (impl/class-loader-counts-change first-sample last-sample)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn set-verbose-classloading + "Set whether the classloader is verbose or not." + [flag] + (impl/set-verbose-classloading flag)) + +;;; CompilationMXBean + +(defn compilation-sample + "Return the total JIT compilation time for the JVM instance. + + Provides a zero-garbage sample of the cumulative time spent in JIT + compilation. This value monotonically increases while the JVM is + running. + + Performance: + - Zero garbage allocation + - Safe for frequent sampling + - Negligible CPU overhead + + Threading: + - Atomic read of compilation time + + Returns: + long - Total compilation time in milliseconds, or -1 if unsupported" + ^long [] + (impl/compilation-sample)) + +(defn compilation + "Returns a compilation time map for the JVM instance in ms. + + The :time-ms key will contain the total compilation time, or -1 if + unsupported." + ([] (impl/compilation (impl/compilation-sample))) + ([sample] (impl/compilation sample))) + +(defn compilation-change + [^long first-sample ^long last-sample] + (impl/compilation-change first-sample last-sample)) + +(defn jit-name + "Returns the name of the JIT compiler." + [] + (impl/jit-name)) + +;;; MemoryMXBean + +(defn finalization-sample + "Return the pending finalization count for the JVM instance." + ^long [] + (impl/finalization-sample)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn finalization + "Return the pending finalization count map for the JVM instance." + ([] (impl/finalization (impl/finalization-sample))) + ([^long finalization-sample] (impl/finalization finalization-sample))) + +(defn finalization-change + "Return the change in pending finalization count for the JVM instance." + [^long first-sample ^long last-sample] + (impl/finalization-change first-sample last-sample)) + +(defn memory-sample + "Return a sample of the memory usage from the MemoryMXBean. + + Captures current heap and non-heap memory usage statistics. + The sample can be passed to memory or memory-change functions + for detailed analysis. + + Performance: + - Zero garbage allocation + - Safe for frequent sampling + - Negligible CPU overhead + + Threading: + - Provides consistent snapshot of memory state + + Returns: + Implementation specific opaque value suitable for memory and memory-change" + [] + (impl/memory-sample)) + +(defn memory + "Return a map of memory usage statistics for the JVM instance. + + Takes an optional memory-sample. If not provided, takes a new sample. + Use with memory-sample when tracking changes over time. + + Return value contains: + :heap - Current heap memory usage in bytes + :non-heap - Current non-heap memory usage in bytes + Each contains: + :committed - Amount of memory guaranteed to be available + :init - Initial amount of memory requested + :max - Maximum amount of memory available + :used - Amount of memory currently used + + Performance: + - Allocates return map + - Use memory-sample for zero-garbage time series + + Threading: + - Values are consistent snapshot at sample time" + ([] (impl/memory (impl/memory-sample))) + ([memory-sample] (impl/memory memory-sample))) + +(defn memory-change + "Return a map of the change in memory usage between first and last samples." + [first-memory-sample last-memory-sample] + (impl/memory-change first-memory-sample last-memory-sample)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn set-memory-verbose! + "Set whether the memory collection system emits verbose output." + [flag] + (impl/set-memory-verbose! flag)) + +;;; MemoryPoolMXBeans + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn memory-pool-names + "Return the names of the JVM's memory pools." + [] + (impl/memory-pool-names)) + +(defn memory-pools-sample + "Return a sample of all memory pool usage statistics. + + Captures usage statistics for all JVM memory pools including: + - Heap pools (Eden, Survivor, Old/Tenured) + - Non-heap pools (Metaspace, CodeCache) + + Performance: + - Zero garbage allocation + - Safe for frequent sampling + - Negligible CPU overhead + + Threading: + - Provides consistent snapshot across all pools + + Returns: + Implementation specific opaque value for use with memory-pools + or memory-pools-change functions" + [] + (impl/memory-pools-sample)) + +(defn memory-pools + "Return a map of memory pool usage statistics. + + Takes an optional memory-pools-sample. If not provided, takes a + new sample. Use with memory-pools-sample for efficient time + series collection. + + Return value is a map of pool-keyword to statistics: + :init - Initial size in bytes + :used - Currently used bytes + :committed - Guaranteed available bytes + :max - Maximum possible bytes or -1 if undefined + + Performance: + - Allocates return map + - Use memory-pools-sample for zero-garbage time series + + Threading: + - Values are consistent snapshot at sample time" + ([] + (impl/memory-pools (memory-pools-sample))) + ([memory-pools-sample] + (impl/memory-pools memory-pools-sample))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn memory-pools-change + "Return a map of the difference between two memory pool samples." + [first-sample last-sample] + (impl/memory-pools-change first-sample last-sample)) + +;;; GarbageCollectorMXBeans + +(defn garbage-collector-names + "Return the names of all garbage collectors in the JVM. + + The names are implementation dependent strings that identify + the garbage collectors configured for this JVM instance. + + Performance: + - Allocates string array + - Result can be cached as collectors don't change + - Not recommended for frequent calls + + Returns: + seq of strings - Names of all garbage collectors" + [] + (impl/garbage-collector-names)) + +(defn garbage-collector-keywords + "Return the JVM garbage collector keywords." + [] + impl/garbage-collector-keywords) + +(defn garbage-collector-sample + "Return a sample of garbage collector statistics. + + Captures the cumulative collection counts and times from all GC beans. + Values monotonically increase during JVM lifetime. + + Performance: + - Zero garbage allocation + - Safe for frequent sampling + - Negligible CPU overhead + + Threading: + - Provides consistent snapshot across all collectors + + Returns: + Implementation specific opaque value for use with garbage-collector + or garbage-collector-change functions" + [] + (impl/garbage-collector-sample)) + +(defn garbage-collector + "Return a map of garbage collector statistics. + + Takes an optional garbage-collector-sample. If not provided, + takes a new sample. Use with garbage-collector-sample for + efficient time series collection. + + Return value contains map of collector-keyword to statistics: + :count - Number of collections (long) + :time-ms - Total collection time in ms (long) + + Performance: + - Allocates return map + - Use garbage-collector-sample for zero-garbage time series + + Threading: + - Values are consistent snapshot at sample time" + ([] (impl/garbage-collector (garbage-collector-sample))) + ([sample] (impl/garbage-collector sample))) + +(defn garbage-collector-change + [first-sample last-sample] + (impl/garbage-collector-change first-sample last-sample)) + +;;; ThreadMXBean + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn thread-ids + "Return thread ID's for all threads" + [] + (impl/thread-ids)) + +(defn current-thread-id + "Return the Thread ID of the current thread. + + This ID is unique within a single JVM session but may be reused + after thread termination in a long-running process. + + Performance: + - Zero garbage + - Very low overhead + - Safe for frequent calls + + Threading: + - Each thread sees its own ID + + Returns: + long - Unique identifier for the current thread" + ^long [] + (impl/current-thread-id)) + +(defn thread-sample + "Return a zero garbage sample of the thread, + Defaults to the current thread." + ([] (thread-sample (current-thread-id))) + ([^long thread-id] (impl/thread-sample thread-id))) + +(defn thread + "Return a map of data on the thread, + Defaults to the current thread." + ([] (impl/thread (thread-sample (current-thread-id)))) + ([sample] (impl/thread sample))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn thread-change + "Return a map of data on the difference between two thread samples, + Defaults to the current thread." + [first-sample last-sample] + (impl/thread-change first-sample last-sample)) + +(defn thread-cpu-time + "Return the total CPU time consumed by a thread in nanoseconds. + + Returns the total CPU time for a given thread ID or the current thread + if no ID is provided. Includes both user and system time. + + The value is monotonically increasing for a given thread. + May return -1 if CPU time measurement is disabled. + + Parameters: + id - Optional thread ID (defaults to current thread) + + Performance: + - Zero garbage + - Low overhead when enabled + - Safe for frequent sampling + + Threading: + - Minimal impact on target thread + + Returns: + long - Nanoseconds of CPU time used, or -1 if unsupported" + (^long [] (impl/thread-cpu-time (current-thread-id))) + (^long [id] (impl/thread-cpu-time id))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn thread-user-time + "Return a sample of the total user time consumed by a thread. + Defaults to the current thread." + (^long [] (impl/thread-user-time (current-thread-id))) + (^long [id] (impl/thread-user-time id))) + +(defn thread-allocated-bytes + "Return the total bytes allocated by a thread. + + Returns cumulative bytes allocated by given thread ID or current thread + if no ID provided. This is a monotonically increasing value that + includes both live and garbage collected objects. + + Parameters: + id - Optional thread ID (defaults to current thread) + + Performance: + - Zero garbage + - Low overhead when enabled + - Safe for frequent sampling + + Threading: + - Each thread's allocations tracked independently + + Returns: + long - Total bytes allocated by the thread, or -1 if unsupported" + ^long + (^long [] (impl/thread-allocated-bytes (current-thread-id))) + (^long [id] (impl/thread-allocated-bytes id))) + +(defn thread-allocated-change + "Calculate the bytes allocated between two thread allocation samples. + + Takes two samples from thread-allocated-bytes and returns the + difference, handling possible counter wrap-around. + + Parameters: + first-sample - Earlier allocation sample + last-sample - Later allocation sample + + Performance: + - Zero garbage + - Uses primitive arithmetic + + Returns: + long - Bytes allocated between samples, may be negative if + last-sample is before first-sample" + ^long [^long first-sample ^long last-sample] + (- last-sample first-sample)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn set-thread-contention-monitoring-enabled [flag] + (impl/set-thread-contention-monitoring-enabled flag)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn set-thread-cpu-time-enabled [flag] + (impl/set-thread-cpu-time-enabled flag)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmacro allocated-bytes + "Measure bytes allocated during execution of body expressions. + + Captures thread allocation before and after executing the body. + Returns a vector containing the bytes allocated and the body's result. + + The allocation measurement includes both live objects and garbage. + Thread allocation tracking must be enabled for accurate results. + + Performance: + - Two allocation tracking calls + - Body executes exactly once + - Minimal overhead around body + + Returns: + [long, any] - Vector containing: + - Bytes allocated during body execution (may be negative) + - Result of body evaluation" + [& body] + `(let [i# (thread-allocated-bytes) + res# (do ~@body)] + [(unchecked-subtract (thread-allocated-bytes) i#) res#])) + +;;; keyword based invocation + +(def metric-fns + {:class-loader-counts class-loader-counts + :compilation compilation + :garbage-collector garbage-collector + :memory memory + :memory-pools memory-pools + :thread thread}) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def metric-sample-fns + {:class-loader-counts class-loader-counts + :compilation compilation-sample + :garbage-collector garbage-collector-sample + :memory memory-sample + :memory-pools memory-pools-sample + :thread thread-sample}) + +(defn collect-metric + "Collect a single metric by key from the JVM management beans. + + Takes a keyword identifying the metric to collect. Valid keys are: + :class-loader-counts - Class loading statistics + :compilation - JIT compilation statistics + :garbage-collector - GC statistics + :memory - Memory usage statistics + :memory-pools - Detailed memory pool statistics + :thread - Thread statistics + + Performance: + - Allocates return value + - May trigger JMX bean creation + - Use metric-sample-fns for zero-garbage time series + + Returns: + map - Metric data structure specific to the requested metric" + [k] + ((metric-fns k))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn collect-metric-sample [k] + ((metric-fns k))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn collect + "Collect multiple metrics from JVM management beans. + + Takes a sequence of metric keywords and returns a map of metric data. + See collect-metric for valid metric keywords. + + Parameters: + kws - Sequence of metric keywords to collect + + Performance: + - Allocates return map and intermediate structures + - Makes multiple JMX calls + - Consider using sample functions for time series data + + Returns: + map - Mapping of metric keywords to their current values" + [kws] + (zipmap kws (map collect-metric kws))) + +;; (defn collect-diff +;; "Collect the difference to the metrics in the given metric map." +;; [ms] +;; (reduce-kv +;; (fn [result k v] +;; (assoc result k (util/diff (collect-metric k) v))) +;; {} +;; ms)) + +;;; OperatingSysteMXBean + +(defn os-details + "Return information about the operating system. + + Provides static information about the OS where the JVM is running. + This information does not change during JVM lifetime. + + Return value contains: + :name - Operating system name + :version - OS version string + :arch - OS architecture + :available-processors - Number of available processors + + Performance: + - Allocates return map + - Result can be cached as values don't change + - Not recommended for frequent calls + + Returns: + map - Operating system information" + [] + (impl/os-details)) + +;;; RuntimeMXBean + +(defn runtime-details + "Return information about the Java runtime environment. + + Provides static information about the JVM instance. + Most values do not change during JVM lifetime. + + Return value contains: + :vm-name - JVM name (eg 'Java HotSpot(TM) 64-Bit Server VM') + :vm-vendor - JVM vendor + :vm-version - JVM version + :java-version - JDK version + :java-runtime-version - JDK runtime version + :input-arguments - List of JVM arguments + + Performance: + - Allocates return map + - Most values can be cached + - Not recommended for frequent calls + + Returns: + map - Java runtime information" + [] + (impl/runtime-details)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn system-properties + "Return the operating system details." + [] + (impl/system-properties)) + +;;; Memory reporting + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn heap-used + "Return the current heap memory usage in bytes. + + Provides a quick but potentially inconsistent view of heap usage. + The value may be incorrect if memory usage changes during measurement. + + This is a lightweight alternative to memory-sample when: + - Approximate values are acceptable + - Minimal overhead is required + - Consistency guarantees aren't needed + + Performance: + - Minimal overhead + - Makes two JVM calls + - May allocate on some JVMs + + Threading: + - Thread safe + - No consistency guarantee between measurements + + Returns: + long - Approximate bytes of heap currently in use" + [] + (let [runtime (Runtime/getRuntime)] + (- (.totalMemory runtime) (.freeMemory runtime)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn runtime-memory + "Return a snapshot of JVM memory metrics. + + Provides a quick but potentially inconsistent view of memory usage. + Values may be inconsistent if memory changes during collection. + + Return value contains: + :free - Currently unused memory in bytes + :total - Current heap size in bytes + :max - Maximum possible heap size in bytes + + Performance: + - Makes three JVM calls + - Allocates return map + - Lighter weight than memory-sample + + Threading: + - Thread safe + - No consistency guarantee between values + + Returns: + map - Basic JVM memory statistics" + [] + (let [runtime (Runtime/getRuntime)] + {:free (.freeMemory runtime) + :total (.totalMemory runtime) + :max (.maxMemory runtime)})) diff --git a/bases/criterium/src/criterium/jvm/impl.clj b/bases/criterium/src/criterium/jvm/impl.clj new file mode 100644 index 0000000..b76388e --- /dev/null +++ b/bases/criterium/src/criterium/jvm/impl.clj @@ -0,0 +1,297 @@ +(ns criterium.jvm.impl + (:require + [clojure.string :as str] + [criterium.util.helpers :as util]) + (:import + [java.lang.management + GarbageCollectorMXBean + ManagementFactory + MemoryPoolMXBean + MemoryUsage + ThreadInfo])) + +;;; Utils + +(defn- val-sum + ([] {}) + ([a b] (merge-with + a b))) + +(defn- val-diff + ([] {}) + ([a b] (merge-with - a b))) + +(defn- name->keyword [n] + (-> n + str/lower-case + (str/replace \space \-) + keyword)) + +;;; ClassLoadingMXBean + +(let [bean (.. ManagementFactory getClassLoadingMXBean)] + (defn class-loader-counts + [] + {:loaded-count (. bean getTotalLoadedClassCount) + :unloaded-count (. bean getUnloadedClassCount)}) + + (defn set-verbose-classloading + "Set whether the classloader is verbose or not." + [flag] + (. bean setVerbose (boolean flag)))) + +(defn class-loader-counts-change + [first-sample last-sample] + (val-diff last-sample first-sample)) + +;;; MemoryMXBean + +(defn- memory-usage + [^MemoryUsage usage] + {:committed (.getCommitted usage) + :init (.getInit usage) + :max (.getMax usage) + :used (.getUsed usage)}) + +(defn- memory-usage-change + [^MemoryUsage first-usage ^MemoryUsage last-usage] + {:committed (- (.getCommitted last-usage) (.getCommitted last-usage)) + :used (- (.getUsed last-usage) (.getUsed first-usage))}) + +(let [mem-bean (.. ManagementFactory getMemoryMXBean)] + (defn finalization-sample + ^long [] + (. mem-bean getObjectPendingFinalizationCount)) + + (defn memory-sample + [] + (let [heap (. mem-bean getHeapMemoryUsage) + non-heap (. mem-bean getNonHeapMemoryUsage)] + {:heap heap + :non-heap non-heap})) + + (defn set-memory-verbose! + "Set whether the memory collection system emits verbose output." + [flag] + (. mem-bean setVerbose flag))) + +(defn memory + "Return a map of the memory usage for the JVM instance." + [memory-sample] + (let [res (util/update-vals memory-sample memory-usage) + total (util/sum (:heap res) (:non-heap res))] + (assoc res :total total))) + +(defn memory-change + "Return a map of the change in memory usage between first and last samples." + [first-memory-sample last-memory-sample] + (let [heap (memory-usage-change + (:heap first-memory-sample) + (:heap last-memory-sample)) + non-heap (memory-usage-change + (:non-heap first-memory-sample) + (:non-heap last-memory-sample))] + {:heap heap + :non-heap non-heap + :total (val-sum heap non-heap)})) + +(defn finalization + [finalization-sample] + {:pending finalization-sample}) + +(defn finalization-change + [^long first-sample ^long last-sample] + {:pending (- last-sample first-sample)}) + +;;; CompilationMXBean + +(let [bean (.. ManagementFactory getCompilationMXBean)] + (if (. bean isCompilationTimeMonitoringSupported) + (defn compilation-sample + ^long [] + (. bean getTotalCompilationTime)) + (defn compilation-sample + ^long [] + -1)) + + (defn jit-name + "Returns the name of the JIT compiler." + [] + (. bean getName))) + +(defn compilation + [sample] + {:time-ms sample}) + +(defn compilation-change + [^long first-sample ^long last-sample] + {:time-ms (- last-sample first-sample)}) + +;;; MemoryPoolMXBeans + +(def memory-pool-beans (vec (.. ManagementFactory getMemoryPoolMXBeans))) + +(defn memory-pool-names + [] + (mapv #(.getName ^MemoryPoolMXBean %) memory-pool-beans)) + +(def memory-pool-keywords + (mapv name->keyword (memory-pool-names))) + +(defmacro memory-pool-beans-samples + [] + `[~@(for [i (range (count memory-pool-beans))] + `(.getUsage ^MemoryPoolMXBean (memory-pool-beans ~i)))]) + +(defn memory-pools-sample + [] + (memory-pool-beans-samples)) + +(defn memory-pools + [sample] + (let [res (zipmap memory-pool-keywords (map memory-usage sample))] + (assoc res :total (reduce val-sum (vals res))))) + +(defn memory-pools-change + [first-sample last-sample] + (let [change (mapv memory-usage-change first-sample last-sample) + res (zipmap memory-pool-keywords change)] + (assoc res :total (reduce val-sum (vals res))))) + +;;; GarbageCollectorMXBeans + +(def garbage-collector-beans + (vec (.. ManagementFactory getGarbageCollectorMXBeans))) + +(defmacro garbage-collector-bean-samples + [] + `[~@(for [i (range (count garbage-collector-beans))] + `(garbage-collector-bean-sample + (garbage-collector-beans ~i)))]) + +(defn garbage-collector-names + [] + (mapv #(.getName ^GarbageCollectorMXBean %) garbage-collector-beans)) + +(def garbage-collector-keywords + (mapv name->keyword (garbage-collector-names))) + +(defn- garbage-collector-bean-sample + [^GarbageCollectorMXBean bean] + {:count (.getCollectionCount bean) + :time-ms (.getCollectionTime bean)}) + +(defn garbage-collector-sample + [] + (garbage-collector-bean-samples)) + +(defn garbage-collector + [sample] + (assoc + (zipmap garbage-collector-keywords sample) + :total (reduce val-sum sample))) + +(defn garbage-collector-change + [first-sample last-sample] + (let [changes (mapv val-diff last-sample first-sample)] + (assoc + (zipmap garbage-collector-keywords changes) + :total (reduce val-sum changes)))) + +;;; ThreadMXBean + +(defn current-thread-id + ^long [] + (.getId (Thread/currentThread))) + +(let [bean (.. ManagementFactory getThreadMXBean)] + + (defn thread-ids + [] + (vec (.getAllThreadIds bean))) + + (defn thread-info-map + [^ThreadInfo thread-info] + {:blocked-count (.getBlockedCount thread-info) + :blocked-time-ms (.getBlockedTime thread-info) + :waited-count (.getWaitedCount thread-info) + :waited-time-ms (.getWaitedTime thread-info)}) + + (defn thread-cpu-time ^long [^long id] + (.getThreadCpuTime bean id)) + + (defn thread-user-time ^long [^long id] + (.getThreadUserTime bean id)) + + (defn thread-allocated-bytes + ^long [^long id] + (.getThreadAllocatedBytes ^com.sun.management.ThreadMXBean bean id)) + + (defn thread-sample + [^long id] + {:thread-info (.getThreadInfo bean id) + :cpu-time (.getThreadCpuTime bean id) + :user-time (.getThreadUserTime bean id) + :allocated (.getThreadAllocatedBytes + ^com.sun.management.ThreadMXBean bean + id)}) + + #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} + (defn threads-summary-sample + [] + {:count (.getThreadCount bean) + :daemon-count (.getDaemonThreadCount bean) + :started-count (.getTotalStartedThreadCount bean)}) + + (defn set-thread-contention-monitoring-enabled [flag] + (.setThreadContentionMonitoringEnabled bean flag)) + + (defn set-thread-cpu-time-enabled [flag] + (.setThreadCpuTimeEnabled bean flag))) + +(defn thread + [sample] + (update sample :thread-info thread-info-map)) + +(defn thread-change + [first-sample last-sample] + (assoc + (val-diff (dissoc last-sample :thread-info) + (dissoc first-sample :thread-info)) + :thread-info (val-diff (thread-info-map (:thread-info last-sample)) + (thread-info-map (:thread-info first-sample))))) + +;;; OperatingSysteMXBean + +(let [bean (.. ManagementFactory getOperatingSystemMXBean)] + (defn os-details + "Return the operating system details as a hash." + [] + {:arch (. bean getArch) + :available-processors (. bean getAvailableProcessors) + :name (. bean getName) + :version (. bean getVersion)})) + +;;; RuntimeMXBean + +(let [bean (.. ManagementFactory getRuntimeMXBean) + props (. bean getSystemProperties)] + + (defn runtime-details + [] + {:input-arguments (. bean getInputArguments) + :name (. bean getName) + :spec-name (. bean getSpecName) + :spec-vendor (. bean getSpecVendor) + :spec-version (. bean getSpecVersion) + :vm-name (. bean getVmName) + :vm-vendor (. bean getVmVendor) + :vm-version (. bean getVmVersion) + :java-version (get props "java.version") + :java-runtime-version (get props "java.runtime.version") + :sun-arch-data-model (get props "sun.arch.data.model") + :clojure-version-string (clojure-version) + :clojure-version *clojure-version*}) + + (defn system-properties + [] + (. bean getSystemProperties))) diff --git a/bases/criterium/src/criterium/measured.clj b/bases/criterium/src/criterium/measured.clj new file mode 100644 index 0000000..1c9ec15 --- /dev/null +++ b/bases/criterium/src/criterium/measured.clj @@ -0,0 +1,105 @@ +(ns criterium.measured + "Implements the concept of a measured function for benchmarking. + + Criterium's metric collection works on a Measured instance. A Measured + represents a benchmarkable unit of code, consisting of: + - A function to execute and measure + - An arguments generator to prevent constant folding + - Optional symbolic representation for debugging + + The Measured implements a timed, batch invocation interface that: + - Supports multiple evaluations per timing sample for fast expressions + - Guarantees zero garbage allocation during measurement + - Prevents constant folding optimization of inputs + + While Criterium automatically creates Measured instances for expressions, + you can also construct custom ones for special measurement needs." + (:require + [criterium.measured.impl :as impl])) + +;;; Measured type + +(defn measured? + "Predicate for x being a Measured." + [x] + (impl/measured? x)) + +(defn measured + "Return a Measured for a function that can be benchmarked. + + The Measured is the basic unit of measurement. A Measured consists of + an arguments generation funtion and a function to be measured. The + function to be measured takes the result of calling the arrguments + function and an eval-count as arguments, ie. `(f (args-fn) + eval-count)`, and returns an `[elapsed-time expr-value]` tuple. + + The arguments function is used to prevent constant folding for + constant inputs. + + The eval-count allows the function to time multiple evaluations of the + subject expression, so that we can measure expressions that are faster + than the timer granularity. + + expr-fn, if specified, returns a symbolic representation of the measured, + for inspection purposes (unused internally)." + ^criterium.measured.impl.Measured + [args-fn f & [expr-fn]] + (impl/measured args-fn f expr-fn)) + +(defn args + "Generate the input state for a measured. + + Returns a value suitable for passing to the measured's function. + The returned value is created fresh each time to prevent constant folding + optimizations." + [measured] + ((:args-fn measured))) + +(defn invoke + "Invoke the given Measured. + + Calls the Measured's function with the given state and eval-count. + The state can be created with `args`. Returns a tuple of [elapsed-time expr-value] + where: + - elapsed-time is a long representing the execution time in nanoseconds + - expr-value is the result of evaluating the measured expression + + Guarantees zero garbage allocation during measurement." + [measured state eval-count] + ;; NOTE eval-count is explicitly not tagged as 'long, since this function is + ;; invoked non-literally, so the calling value will always be an object. + ((:f measured) state eval-count)) + +(defn ^:no-doc symbolic + "Return a symbolic representation of the measured. + + Used for debugging and introspection to understand how the measured will + be executed. Returns nil if no symbolic representation was provided." + [measured] + (when-let [expr-fn (:expr-fn measured)] + (expr-fn))) + +;;; Build a Measured from an expression + +(defmacro expr + "Return a Measured for the given expression. + + The expression is wrapped in a function. + + When the expression is a list form, it is treated as a function call. + The function arguments are treated as constant expressions and are + hoisted into a state function. The result of the state function is a + vector that is passed to the function wrapper as a vector and + destructured." + ([expr] + (impl/measured-expr* expr nil)) + ([expr options] + (impl/measured-expr* expr options))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmacro callable + "Return a Measured for the given no arg function." + ([f] + (impl/measured-callable f)) + ([sf f] + (impl/measured-callable sf f))) diff --git a/bases/criterium/src/criterium/measured/impl.clj b/bases/criterium/src/criterium/measured/impl.clj new file mode 100644 index 0000000..b7a0417 --- /dev/null +++ b/bases/criterium/src/criterium/measured/impl.clj @@ -0,0 +1,253 @@ +(ns criterium.measured.impl + (:require + [criterium.jvm :as jvm] + [criterium.util.blackhole :as blackhole] + [criterium.util.helpers :as util])) + +(defrecord Measured + [^clojure.lang.IFn args-fn + ^clojure.lang.IFn f + expr-fn]) + +(alter-meta! #'->Measured assoc :private true) +(alter-meta! #'map->Measured assoc :private true) + +(defn measured? + "Predicate for x being a Measured." + [x] + (instance? Measured x)) + +(defn measured + "Return a Measured for a function that can be benchmarked. + + The Measured is the basic unit of measurement. A Measured consists of + a state generation funtion and a function to be measured. The + function to be measured takes the result of calling the state function + and an eval-count as arguments, ie. `(f (args-fn) eval-count)`, and + returns an `[elapsed-time expr-value]` tuple. + + The state function is used to prevent constant folding for constant + inputs. + + The eval-count allows usage where the function is a wrapper that + evaluates the subject expression multiple times. + + expr-fn, if specified, returns a symbolic representation of the measured, + for inspection purposes (unused internally). + " + ^Measured + [args-fn + f + & [expr-fn]] + (->Measured args-fn f expr-fn)) + +(defn- s-expression? + "Predicate for expr being an S-expression." + [expr] + (or (list? expr) (instance? clojure.lang.Cons expr))) + +(defrecord FnCallExpr + ;; a representation of an s-expression + [op ; the operand + arg-syms ; arguments as symbols + arg-vals ; the symbolic value of of the arg-syms + metamap ; metadata on the FnCallExpr + ]) + +(defn- fn-call-expr? + "Predicate for x being an instance of a FnCallExpr" + [x] + (instance? FnCallExpr x)) + +(defn- gen-arg-sym + "Generate a symbol for an argument." + [] + (gensym "arg")) + +(defn ^:no-doc form-print + "Return a symbolic expression for the argument." + [x] + (cond + (symbol? x) x + + (fn-call-expr? x) + (with-meta + `(~(:op x) ~@(mapv form-print (:arg-syms x))) + (:metamap x)))) + +(defn ^:no-doc factor-form + "Factor form, extracting constant expressions." + [form] + (let [subj (first form) + tform (fn [x] + (if (and (s-expression? x) (= subj (first x))) + (reduce + (fn [res arg] + (if (fn-call-expr? arg) + (-> res + (update :arg-syms conj arg) + (update :arg-vals merge (:arg-vals arg))) + (let [arg-sym (gen-arg-sym)] + (-> res + (update :arg-syms conj arg-sym) + (update :arg-vals assoc arg-sym arg))))) + (->FnCallExpr + (first x) + [] + {} + (meta x)) + (rest x)) + x)) + res (util/postwalk + tform + form)] + {:expr (form-print res) + :arg-vals (:arg-vals res)})) + +(defn ^:no-doc factor-const [expr] + (let [arg-sym (gen-arg-sym)] + {:expr arg-sym + :arg-vals {arg-sym expr}})) + +(defn ^:no-doc factor-expr [expr] + (if (s-expression? expr) + (factor-form expr) + (factor-const expr))) + +(defn ^:no-doc cast-fn + "Return a cast function givent a tag." + [tag] + (when (and (symbol? tag) + (#{'long 'int 'double 'float} tag)) + tag)) + +(defn ^:no-doc binding-with-hint-or-cast + "Return a binding pair to type hint or cast values." + [arg-sym arg-meta] + (let [tag (:tag arg-meta)] + (if-let [f (cast-fn tag)] + [arg-sym (list f arg-sym)] + [(with-meta arg-sym arg-meta) arg-sym]))) + +(defn ^:internal ^:no-doc measured-expr-fn + "Construct a function expression to measure the given expr. + Captures the expression arguments into a state function." + [arg-syms expr {:keys [arg-metas time-fn]}] + (let [blackhole-sym (with-meta (gensym "blachole") + {:tag 'org.openjdk.jmh.infra.Blackhole}) + eval-count-sym (gensym "eval-count") + time-fn (when time-fn + (with-meta + time-fn + {:tag 'clojure.lang.IFn$L}))] + `(fn ~'measured + [~arg-syms + ;; explicitly not tagged as 'long, since this function is invoked + ;; non-literally, so the calling value will always be an object. + ~eval-count-sym] + (let [~blackhole-sym blackhole/blackhole ; hoist cast lookup out of loop + ~@(mapcat binding-with-hint-or-cast arg-syms arg-metas) + ;; primitive loop coounter. Decrement since we evaluate + ;; once outside the loop. + ~(with-meta eval-count-sym {:tag 'long}) + ~eval-count-sym ; convert to 'long + n# (long (unchecked-dec ~eval-count-sym)) + start# ~(if time-fn + `(. ~time-fn invokePrim) + `(jvm/timestamp)) + val# ~expr] ; evaluate once to get a return value + (loop [i# n#] + (when (pos? i#) + ;; don't use a local inside the loop, to avoid locals clearing + (.consume ~blackhole-sym ~expr) + (recur (long (unchecked-dec i#))))) + (let [finish# ~(if time-fn + `(. ~time-fn invokePrim) + `(jvm/timestamp))] + (blackhole/evaporate) + [(unchecked-subtract finish# start#) val#]))))) + +(defn ^:no-doc merge-metas + "Merge two sequences of maps. + Sequences may be of differing lengths. The returned length is the + largest of the two input lengths." + [m1 m2] + (let [l1 (count m1) + l2 (count m2)] + (into (mapv merge m1 m2) + (if (>= l1 l2) + (drop l2 m1) + (drop l1 m2))))) + +(def ^:private TYPE-NAME-CONVERSIONS + ;; converting is good for measureds that use these values directly + ;; but causes wrapping if the values are returned from the measured. + {'java.lang.Long 'long + 'java.lang.Integer 'int + 'java.lang.Double 'double + 'java.lang.Float 'float}) + +(defn ^:no-doc type-name-conversion [t] + (TYPE-NAME-CONVERSIONS t t)) + +(defn ^:no-doc tag-meta [^Class t] + (when t + (let [type-name (-> (.getCanonicalName ^Class t) + symbol + type-name-conversion)] + {:tag type-name}))) + +(defn ^:no-doc capture-arg-types + "Use eval to get types of the arg expressions. + Return a sequence of metadata maps with :tag tupe hints." + [arg-exprs] + (let [types (mapv (comp type eval) arg-exprs)] + (mapv tag-meta types))) + +(defn measured-expr* + "Return a measured function for the given expression. + + The arguments are converted into a vector, which is used as an + argument to the a function that wraps the expression. + + Any expr that is not a List is treated as a constant. This is mainly + for internal benchmarking." + [expr options] + (let [{:keys [expr arg-vals] :as _f} (factor-expr expr) + arg-metas (capture-arg-types (vals arg-vals)) + options (update + options + :arg-metas merge-metas arg-metas)] + `(measured + (fn ~'measured-args [] ~(vec (vals arg-vals))) + ~(measured-expr-fn + (vec (keys arg-vals)) + expr + options) + (fn ~'measured-expr [] + ~(list 'quote + `(do (let [~@arg-vals] + (time ~expr)))))))) + +(defn measured-callable + ([f] + `(measured + (fn ~'measured-args []) + ~(measured-expr-fn + [] + `(~f) + {}) + (fn ~'measured-expr [] + ~(list 'quote + `(time (~f)))))) + ([args-f f] + (let [args (gensym "args")] + `(measured + (fn ~'measured-args [] (~args-f)) + ~(measured-expr-fn + [args] + `(apply ~f [~args]) + {}) + (fn ~'measured-expr [] + ~(list 'quote + `(time (~f)))))))) diff --git a/bases/criterium/src/criterium/metric.clj b/bases/criterium/src/criterium/metric.clj new file mode 100644 index 0000000..2ab4ca4 --- /dev/null +++ b/bases/criterium/src/criterium/metric.clj @@ -0,0 +1,194 @@ +(ns criterium.metric + "Functions for working with metric configurations and definitions. + + A metric represents a measurable value that can be collected during + benchmarking. Each metric is described by a configuration map with + the following structure: + + {:type keyword ; The type of metric (e.g., :timing, :memory) + :name string ; Human readable name of the metric + :values [...] ; Collection of metric value configurations} + + Metrics can be organized in groups using a metrics configuration map: + {:group-name {:values [...]} ; Direct metric values + :other-group {:groups {:subgroup {:values [...]}}} ; Nested metric groups} + + This namespace provides functions for querying and filtering metric + configurations. It supports both flat and hierarchical metric + organization structures." + (:require + [clojure.set :as set] + [criterium.util.helpers :as util] + [criterium.util.invariant :as invariant :refer [have?]])) + +(def ^:private metric-keys + #{:path :dimension :scale :label}) + +(defn metric-config? + [x] + (and (map? x) + (or (set/subset? metric-keys (set (keys x))) + (throw + (invariant/assertion-error + "Invalid keys" + {:error-tupe ::invalid-map-keys + :date {:expected metric-keys + :actual (keys x) + :missing (set/difference + metric-keys + (set (keys x)))}}))))) + +(defn metric-configs + "Returns a sequence of metric-config maps from a metrics configuration map. + + Takes a map of metric groups where each group contains a :values key + with a sequence of metric configurations. Flattens all metric configs + into a single sequence. + + Example input: + {:timing {:values [{:type :timing, :name \"execution-time\"}]} + :memory {:values [{:type :memory, :name \"heap-usage\"}]}} + + Returns: + [{:type :timing, :name \"execution-time\"} + {:type :memory, :name \"heap-usage\"}]" + [metric-defs] + {:post [(have? #(every? metric-config? %) %)]} + (mapcat :values (vals metric-defs))) + +(defn all-metric-configs + "Return all metric configurations from both flat and nested structures. + + Similar to metric-configs but also handles nested metric + groups. Processes both direct :values entries and nested :groups + configurations recursively. + + Example input: + {:timing {:values [{:type :timing, :name \"basic-timing\"}]} + :memory {:groups + {:heap {:values [{:type :memory, :name \"heap-used\"}]} + :non-heap {:values [{:type :memory, :name \"metaspace\"}]}}}} + + Return flat sequence of all metric configurations regardless of + nesting." + [metric-defs] + {:post [(have? #(every? metric-config? %) %)]} + (reduce-kv + (fn [res _k metric-group] + (reduce + conj + res + (or (:values metric-group) + (mapcat :values (vals (:groups metric-group)))))) + [] + metric-defs)) + +(defn select-metrics + [metrics-defs metric-ids] + {:pre [(have? map? metrics-defs)]} + (if metric-ids + (select-keys metrics-defs metric-ids) + metrics-defs)) + +(defn metrics-of-type + "Returns a map of metric configurations filtered by type and optional IDs. + + Given a metrics configuration map, returns configurations matching the + specified metric-type. If metric-ids is provided, only returns metrics + with matching IDs. + + Parameters: + metrics-config - Map of metric configurations + + metric-type - Keyword identifying the type of metrics to + select (e.g., :timing) + + metric-ids - Optional sequence of metric IDs to filter by. If nil, + returns all metrics of the specified type. + + Example: + (metrics-of-type config :timing [:exec-time :wait-time]) + + Return only :timing metrics with the specified IDs." + [metrics-config metric-type metric-ids] + (->> + (select-metrics metrics-config metric-ids) + (util/filter-map #(= metric-type (:type %))))) + +(defn metric-configs-of-type + "Return a sequence of metric configurations filtered by type and optional IDs. + + Similar to metrics-of-type but returns a flat sequence of + configurations instead of a map. Useful when you need to process all + matching configurations sequentially. + + Parameters: + metrics-defs - Map of metric configurations + metric-type - Keyword identifying the type of metrics to select + metric-ids - Optional sequence of metric IDs to filter by + + Example: + (metric-configs-of-type config :memory nil) + ; Returns sequence of all memory metric configurations" + [metrics-defs metric-type metric-ids] + (->> (metrics-of-type metrics-defs metric-type metric-ids) + metric-configs + (filterv #(= metric-type (:type %))))) + +;;; Sample Metric Accessors + +(defn elapsed-time + "Return the elapsed time in nanoseconds from a sample map. + + Parameters: + sample - A map containing benchmark sample data with :elapsed-time key + + Return: + long - The elapsed time in nanoseconds + + Note: This function assumes the sample contains an :elapsed-time value + measured in nanoseconds." + ^long [sample] + (:elapsed-time sample)) + + + + +;;;; + + + +(defn filter-metric-values + "Filter a sequence of metric value maps using predicate" + [pred values] + (filterv pred values)) + +(declare filter-metrics) + +(defn filter-metrics* + "Filter metrics tree keeping values matching predicate. + Preserves structure while only keeping values that match the predicate. + When filtering groups, removes empty groups after filtering." + [pred metrics] + (cond-> metrics + (:values metrics) + (update :values #(filterv pred %)) + + (:groups metrics) + (update :groups (fn [g] (filter-metrics g pred))))) + +(defn filter-metrics + [metrics pred] + (->> + (update-vals metrics (partial filter-metrics* pred)) + (util/filter-map #(or (seq (:values %)) (seq (:groups %)))))) + +(defn dimension-pred + "Create predicate that matches metric values with given dimension" + [dim] + #(= dim (:dimension %))) + +(defn type-pred + "Create predicate that matches metric values with given type" + [typ] + #(= typ (:type %))) diff --git a/bases/criterium/src/criterium/platform.clj b/bases/criterium/src/criterium/platform.clj new file mode 100644 index 0000000..1cc8418 --- /dev/null +++ b/bases/criterium/src/criterium/platform.clj @@ -0,0 +1,262 @@ +(ns criterium.platform + "Platform characterisation" + (:require + [clojure.pprint :as pp] + [criterium.analyse] + [criterium.bench :as bench] + [criterium.collect-plan.config :as collect-plan-config] + [criterium.collector :as collector] + [criterium.jvm :as jvm] + [criterium.measured :as measured] + [criterium.collect :as collect]) + (:gen-class)) + +(def ^:private benchmark + {:analyse [:transform-log + [:quantiles {:quantiles [0.9 0.99 0.99]}] + :outliers + [:stats {:samples-id :log-samples}] + :event-stats] + :view []}) + +(def ^:private collection + {:collect-plan + (collect-plan-config/collect-plan-config + :with-jit-warmup + {:batch-time-ns 100000 + :num-measure-samples 500}) + :collector-config + {:stages [:measured-args :compilation :garbage-collector], + :terminator :elapsed-time} + :return-value [::nil] + :viewer :none}) + +;;; nanoTime latency +(def ^:private timestamp-measured + (measured/expr (jvm/timestamp))) + +(defn nanotime-latency + ;; this takes a while for the timestamp capture to synch with the change in + ;; the timestamp. Ideally we would throw away the first half of the samples. + ([] (nanotime-latency {})) + ([options] + (bench/bench-measured + (merge + benchmark + {:collect-plan + (collect-plan-config/collect-plan-config + :with-jit-warmup + {:batch-time-ns 100000 + :num-measure-samples 1000 + :limit-time-ns 20000000000}) + :collector-config + {:stages [:measured-args :compilation :garbage-collector], + :terminator :elapsed-time} + :return-value [::nil] + :viewer :none} + options) + timestamp-measured))) + +;;; nanoTime granularity + +(defn- nanotime-granularity-fn + [_ ^long eval-count] + ;; this takes a while for the timestamp capture to synch with the change in + ;; the timestamp. Ideally we would throw away the first half of the samples. + (let [start (jvm/timestamp) + ^long finish (loop [n eval-count + t start] + (let [t1 (jvm/timestamp)] + (if (= t t1) + (recur n t1) + (if (pos? n) + (recur (unchecked-dec n) t1) + t1)))) + delta (unchecked-subtract finish start)] + [delta (long (/ delta eval-count))])) + +(def nanotime-granularity-measured + (let [args-fn (fn granularity-args-fn [] nil)] + (measured/measured + args-fn + nanotime-granularity-fn))) + +(defn nanotime-granularity + ([] (nanotime-granularity {})) + ([options] + (bench/bench-measured + (merge + benchmark + {:collect-plan + (collect-plan-config/collect-plan-config + :with-jit-warmup + {:batch-time-ns 100000 + :num-measure-samples 500}) + :collector-config + {:stages [:measured-args :compilation :garbage-collector], + :terminator :elapsed-time} + :return-value [::nil] + :viewer :none} + options) + nanotime-granularity-measured))) + +;;; Minimum measured time + +(defn constant-long + ([] (constant-long {})) + ([options] + (bench/bench-measured + (merge + {:viewer :none} + benchmark + collection + options) + (measured/expr 1)))) + +(defn constant-double + ([] (constant-double {})) + ([options] + (bench/bench-measured + (merge + {:viewer :none} + benchmark + collection + options) + (measured/expr 1.0)))) + +(defn constant-object + ([] (constant-double {})) + ([options] + (bench/bench-measured + (merge + {:viewer :none} + benchmark + collection + options) + (measured/expr {})))) + +(defn constant-nil + ([] (constant-nil {})) + ([options] + (bench/bench-measured + (merge + {:viewer :none} + benchmark + collection + options) + (measured/expr nil)))) + +(defn- find-jit-threasholds [measured collector] + (loop [i 0 + res [] + comp (jvm/compilation-sample) + t 0] + (if (< i 4000000) + (let [sample (collector/collect + collector measured (measured/args measured) 1) + comp2 (jvm/compilation-sample) + comp-delta (jvm/compilation-change comp comp2)] + (when (= 0 (mod i 10000)) + ;; pause to let compilation "catchup"/complete + (Thread/sleep 50)) + (recur + (unchecked-inc i) + (if (pos? (long (:time-ms comp-delta))) + (conj res [i (:time-ms comp-delta)]) + res) + comp2 + (unchecked-add t (long (:elapsed-time sample))))) + [res t]))) + +;;; JIT compilation thresholds + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn jit-threasholds + "Estimate how many iterations are required for JIT compilation. + This is not very accurate, as JIT runs in the background, and there + are several compilation targets. + + The highest value returned should be less than TARGET-WARMUP-SAMPLES for + our collection plans to be realistic." + ([] (jit-threasholds {})) + ([_options] + (let [collector (collector/collector {:stages [] :terminator :elapsed-time}) + measured (measured/expr 1) + [res _t] (find-jit-threasholds measured collector)] + res))) + +;; (jit-threasholds) + +;;; platform description + +(defn- mean-elapsed-time [result] + (-> result :elapsed-time :mean)) + +(defn- min-elapsed-time [result] + (-> result :elapsed-time :min-val)) + +(defn platform-stats + "Return a sequence of estimates for times that describe accuracy of timing. + + Each estimate has a :name value." + ([] (platform-stats {})) + ([options] + (let [options (merge + options + {:return-value [:stats :stats]})] + [(assoc (nanotime-latency options) :name "latency") + (assoc (nanotime-granularity options) :name "granularity") + (assoc (constant-long options) :name "constant-long") + (assoc (constant-double options) :name "constant-double") + (assoc (constant-object options) :name "constant-object") + (assoc (constant-nil options) :name "constant-nil")]))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn platform-point-estimates + "Return estimates for times that describe the accuracy of timing. + + The latency and granularity are min estimate, and the the rest are + mean estimates." + ([] (platform-point-estimates {})) + ([options] + (let [stats (platform-stats options) + point-estimate {:latency min-elapsed-time + :granularity min-elapsed-time + :constant-long mean-elapsed-time + :constant-double mean-elapsed-time + :constant-object mean-elapsed-time + :constant-nil mean-elapsed-time}] + (reduce + (fn [res stat] + (let [kw (keyword (:name stat))] + (assoc res kw ((point-estimate kw) stat)))) + {} + stats)))) + +(defn exec-main + "Output a table of the platform min and mean point estimates. + Compatible with deps.edn :exec-fn." + [_opts] + (pp/pprint (jvm/os-details)) + (pp/pprint (select-keys (jvm/runtime-details) + [:vm-version :vm-name :vm-vendor + :clojure-version-string])) + (let [stats + (reduce + (fn [res stat] + (let [view {:name (:name stat) + :min-ns (min-elapsed-time stat) + :mean-ns (mean-elapsed-time stat)}] + (conj res view))) + [] + (platform-stats))] + (pp/print-table stats) + (println) + (println "JIT compilation threasholds: " (jit-threasholds)) + (println))) + +(defn -main + "Output a table of the platform min and mean point estimates. + Compatible with deps.edn :main." + [] + (exec-main {})) diff --git a/bases/criterium/src/criterium/sampled_fn.clj b/bases/criterium/src/criterium/sampled_fn.clj new file mode 100644 index 0000000..445ffde --- /dev/null +++ b/bases/criterium/src/criterium/sampled_fn.clj @@ -0,0 +1,175 @@ +(ns criterium.sampled-fn + "First-class function instrumentation for aggregated sampling. + + Provides functionality for wrapping functions with instrumentation code + that collects performance data during execution. The instrumented functions + are first-class objects that maintain their own sample collection state." + (:require + [criterium.collect-plan :as collect-plan] + [criterium.collector :as collector] + [criterium.jvm :as jvm] + [criterium.measured :as measured] + [criterium.sampler :as sampler] + [criterium.util.invariant :refer [have]] + [criterium.util.t-digest :as t-digest] + [criterium.metric :as metric]) + (:import + [java.util.concurrent Callable])) + +(defn- measured [original-fn] + (measured/measured + (fn state-f [] (assert false)) + (fn measured-f [args ^long eval-count] + (have (partial = 1) eval-count) + (let [start (jvm/timestamp) + res (apply original-fn args) + finish (jvm/timestamp)] + [(unchecked-subtract finish start) res])))) + +(defmacro ^:private invoke-f + [collector measured metric-keys args] + `(let [sample# (collector/collect ~collector ~measured ~args 1)] + (set! ~'digests (reduce + (fn [ds# k#] + (update ds# k# t-digest/add-point (get-in sample# k#))) + ~'digests + ~metric-keys)) + (:expr-value sample#))) + +(defn- sample-map + "Convert t-digests into an analyzable sample map structure. + + Takes collected samples and metrics configurations and produces a map + in the format expected by criterium's analysis functions." + [metrics-defs digests] + {:type :criterium/digest + :metric->digest (update-vals digests t-digest/compress) + :transform collect-plan/identity-transforms + :metrics-defs metrics-defs + :expr-value nil + :source-id nil}) + +(deftype SampledFn + [^clojure.lang.IFn original-fn + metric-keys + collector + measured + ^:volatile-mutable digests] + + sampler/Sampler + (samples-map [_] (sample-map (:metrics-defs collector) digests)) + (reset-samples! [_] (set! digests (zipmap metric-keys (t-digest/new-digest)))) + + clojure.lang.IFn + (invoke [this] + (invoke-f collector measured metric-keys [])) + (invoke [this a1] + (invoke-f collector measured metric-keys [a1])) + (invoke [this a1 a2] + (invoke-f collector measured metric-keys [a1 a2])) + (invoke [this a1 a2 a3] + (invoke-f collector measured metric-keys [a1 a2 a3])) + (invoke [this a1 a2 a3 a4] + (invoke-f collector measured metric-keys [a1 a2 a3 a4])) + (invoke [this a1 a2 a3 a4 a5] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5])) + (invoke [this a1 a2 a3 a4 a5 a6] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6])) + (invoke [this a1 a2 a3 a4 a5 a6 a7] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7 a8])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7 a8 a9])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13] + (invoke-f collector measured metric-keys [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17])) + (invoke [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18])) + (invoke + [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19])) + (invoke + [this a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19 a20] + (invoke-f + collector + measured + metric-keys + [a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16 a17 a18 a19 a20])) + (applyTo [this args] + (invoke-f collector measured metric-keys args)) + + Runnable + (run [this] + (invoke-f collector measured metric-keys [])) + + Callable + (call [this] + (invoke-f collector measured metric-keys []))) + +(defn sample-fn + "Create an sample wrapper of function f. + + Takes a function and a collector configuration and returns a new + function that wraps the original while collecting timing samples + during execution. The returned function implements IFn, Runnable, and + Callable interfaces. + + Parameters: + f - The function to instrument + collector-config - A collector configuration that defines how samples + are processed + + Returns: + An InstrumentedFn instance that wraps the original function and maintains + its own sample collection state." + [f collector-config] + (let [collector (collector/collector collector-config) + metrics-defs (-> (:metrics-defs collector) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + metric-keys (mapv :path metric-configs)] + (->SampledFn + f + metric-keys + collector + (measured f) + (zipmap metric-keys (repeat (t-digest/new-digest)))))) diff --git a/bases/criterium/src/criterium/sampler.clj b/bases/criterium/src/criterium/sampler.clj new file mode 100644 index 0000000..79b44e9 --- /dev/null +++ b/bases/criterium/src/criterium/sampler.clj @@ -0,0 +1,11 @@ +(ns criterium.sampler + "Protocol and utilities for working with performance sampling state. + + Provides a standard interface for components that collect and store + performance metrics samples during execution.") + + +(defprotocol Sampler + "Protocol for accessing sampling state" + (samples-map [this] "Get the current samples collected") + (reset-samples! [this] "Clear all collected samples")) diff --git a/bases/criterium/src/criterium/trigger.clj b/bases/criterium/src/criterium/trigger.clj new file mode 100644 index 0000000..8618ea6 --- /dev/null +++ b/bases/criterium/src/criterium/trigger.clj @@ -0,0 +1,50 @@ +(ns criterium.trigger + "Provide a trigger for collecting elapsed time samples between trigger events. + + The trigger maintains internal state about when it was last triggered + and collects samples of elapsed time between trigger + events. + + Typical usage: + + (let [t (trigger)] + (fire! t) ;; Start timing + (do-something) + (fire! t) ;; Record elapsed time + (do-something-else) + (fire! t) ;; Record another sample + (let [samples (sampler/samples-map t)] ;; Get samples and reset + (analyze-samples samples)))" + (:require + [criterium.trigger.impl :as impl])) + +(defn trigger + "Creates and returns a new trigger sampler. + + The returned trigger maintains state about: + - When it was last triggered (timestamp) + - A collection of timing samples between trigger events + + The trigger starts with no samples and must be fired at least twice + to collect timing data." + [] + (impl/trigger)) + +(defn fire! + "Records a timing event in the trigger sampler. + + When fired: + - Records current timestamp + - If this isn't the first firing, calculates elapsed time since last firing + - Adds the elapsed time and any extra-data as a new sample + + The first firing only records the initial timestamp - no sample is collected + until the second firing. + + Parameters: + trigger - The trigger instance to fire + extra-data - Optional map of additional data to attach to the sample" + ([trigger] + (impl/fire! trigger nil)) + ([trigger extra-data] + (impl/fire! trigger extra-data))) diff --git a/bases/criterium/src/criterium/trigger/impl.clj b/bases/criterium/src/criterium/trigger/impl.clj new file mode 100644 index 0000000..333f5b1 --- /dev/null +++ b/bases/criterium/src/criterium/trigger/impl.clj @@ -0,0 +1,58 @@ +(ns criterium.trigger.impl + (:require + [criterium.collect :as collect] + [criterium.collect-plan :as collect-plan] + [criterium.collector :as collector] + [criterium.jvm :as jvm] + [criterium.sampler :as sampler] + [criterium.types :as types] + [criterium.util.invariant :refer [have?]])) + +(defn samples->samples-map + [samples] + {:post [(have? types/metrics-samples-map? %)]} + (let [collector (collector/collector + {:terminator :elapsed-time})] + {:type :criterium/metrics-samples + :batch-size 1 + :eval-count (count samples) + :metrics-defs (:metrics-defs collector) + :metric->values (collect/sample-maps->map-of-samples + samples + (:metrics-defs collector)) + :transform collect-plan/identity-transforms + :num-samples (count samples) + :source-id nil + :expr-value nil})) + +(defrecord TriggerData + [^long last-triggered + samples]) + +(defrecord Trigger + [state] + + sampler/Sampler + (samples-map [_] + (samples->samples-map (:samples @state))) + (reset-samples! [_] + (reset! state (->TriggerData 0 [])) + nil)) + +(defn fire! [^Trigger trigger extra-data] + (swap! (:state trigger) + (fn [^TriggerData trigger-data] + (let [prev-time (.last-triggered trigger-data) + this-time (jvm/timestamp)] + (->TriggerData + this-time + (if (zero? prev-time) + (:samples trigger-data) + (conj + (:samples trigger-data) + (merge + {:elapsed-time (unchecked-subtract this-time prev-time)} + extra-data)))))))) + +(defn trigger [] + (->Trigger (atom (->TriggerData 0 [])))) diff --git a/bases/criterium/src/criterium/types.clj b/bases/criterium/src/criterium/types.clj new file mode 100644 index 0000000..f564cff --- /dev/null +++ b/bases/criterium/src/criterium/types.clj @@ -0,0 +1,189 @@ +(ns criterium.types + "Type predicates and accessors for benchmarking data structures" + (:require + [clojure.set :as set] + [criterium.util.invariant :as invariant :refer [have?]])) + +;;; Type predicates + +(defn collection-map? + "Check if x is a collection map with required keys." + [x] + (and (map? x) + (set/subset? + #{:eval-count + :elapsed-time + :collections + :num-samples + :batch-size + :collector} + (set (keys x))))) + +(defn data-entry-map? + [x] + (and (map? x) + (set/subset? #{:type :transform} (set (keys x))))) + +(defn collected-metrics-map? + [x] + (and (map? x) + (set/subset? + #{:type + :transform + :metric->values + :elapsed-time + :num-samples + :batch-size + :eval-count + :metrics-defs + :expr-value} + (set (keys x))))) + +(def metrics-samples-keys + #{:type + :transform + :source-id + :metric->values + :num-samples + :batch-size + :metrics-defs + :expr-value }) + + +(defn metrics-samples-map? + [x] + (and (map? x) + (or + (= :criterium/metrics-samples (:type x)) + (throw + (invariant/assertion-error + "Invalid tupe" + {:error-tupe ::invalid-type + :date {:expected :criterium/metrics-samples + :actual (:type x)}}))) + (or + (set/subset? metrics-samples-keys (set (keys x))) + (throw + (invariant/assertion-error + "Invalid keys" + {:error-tupe ::invalid-map-keys + :date {:expected metrics-samples-keys + :actual (keys x) + :missing (set/difference + metrics-samples-keys + (set (keys x)))}}))))) + +(def digest-samples-keys + #{:type + :transform + :source-id + :metric->digest + :num-samples + :batch-size + :metrics-defs + :expr-value }) + +(defn digest-samples-map? + [x] + (and (map? x) + (or + (= :criterium/digest (:type x)) + (throw + (invariant/assertion-error + "Invalid tupe" + {:error-tupe ::invalid-type + :date {:expected :criterium/digest + :actual (:type x)}}))) + (or + (set/subset? digest-samples-keys (set (keys x))) + (throw + (invariant/assertion-error + "Invalid keys" + {:error-tupe ::invalid-map-keys + :date {:expected digest-samples-keys + :actual (keys x) + :missing (set/difference + digest-samples-keys + (set (keys x)))}}))))) + +(defn generic-metrics-samples-map? + [x] + (#{:criterium/metrics-samples + :criterium/collected-metrics-samples + :criterium/digest} + (:type x))) + +(def quantiles-map-keys #{:type :quantiles :metrics-defs :source-id}) + +(defn quantiles-map? + [x] + (and (map? x) + (= :criterium/quantiles (:type x)) + (set/subset? quantiles-map-keys (set (keys x))))) + +(def outliers-map-keys + #{:type :outliers :metrics-defs :source-id :quantiles-id :num-samples + :transform}) + +(defn outliers-map? + [x] + (and (map? x) + (= :criterium/outliers (:type x)) + (or + (set/subset? outliers-map-keys (set (keys x))) + (throw + (invariant/assertion-error + "Invalid keys" + {:error-tupe ::invalid-map-keys + :date {:expected outliers-map-keys + :actual (keys x) + :missing (set/difference + outliers-map-keys + (set (keys x)))}}))))) + +(def stats-map-keys + #{:type :stats :metrics-defs :transform :batch-size :source-id + :outliers-id}) + +(defn stats-map? + [x] + (and (map? x) + (= :criterium/stats (:type x)) + (set/subset? stats-map-keys (set (keys x))))) + +(def event-stats-map-keys + #{:type :event-stats :metrics-defs :transform :batch-size :source-id}) + +(defn event-stats-map? + [x] + (and (map? x) + (= :criterium/event-stats (:type x)) + (set/subset? event-stats-map-keys (set (keys x))))) + +(def outlier-significance-map-keys + #{:type :outlier-significance :metrics-defs :source-id :outliers-id}) + +(defn outlier-significance-map? + [x] + (and (map? x) + (= :criterium/outlier-significance (:type x)) + (set/subset? outlier-significance-map-keys (set (keys x))))) + +(def bootstrap-map-keys + #{:type :bootstrap :metrics-defs :transform :batch-size :source-id}) + +(defn bootstrap-map? + [x] + (and (map? x) + (= :criterium/bootstrap (:type x)) + (set/subset? bootstrap-map-keys (set (keys x))))) + +(defn result-map? + [x] + (and (map? x) + (every? data-entry-map? (vals x)))) + +(defn benchmark-map? + [x] + (and (map? x) + (result-map? (:data x)))) diff --git a/bases/criterium/src/criterium/util/blackhole.clj b/bases/criterium/src/criterium/util/blackhole.clj new file mode 100644 index 0000000..9c40308 --- /dev/null +++ b/bases/criterium/src/criterium/util/blackhole.clj @@ -0,0 +1,15 @@ +(ns criterium.util.blackhole + "JMH blackhole wrapper." + (:import + [org.openjdk.jmh.infra Blackhole])) + +(def ^{:tag 'org.openjdk.jmh.infra.Blackhole} blackhole + (Blackhole. + "Today's password is swordfish. I understand instantiating Blackholes directly is dangerous.")) + +(defn evaporate + "Evaporate the blackhole, releasing any references it may contain." + [] + (.evaporate + blackhole + "Yes, I am Stephen Hawking, and know a thing or two about black holes.")) diff --git a/bases/criterium/src/criterium/util/bootstrap.clj b/bases/criterium/src/criterium/util/bootstrap.clj new file mode 100644 index 0000000..f45d783 --- /dev/null +++ b/bases/criterium/src/criterium/util/bootstrap.clj @@ -0,0 +1,232 @@ +(ns criterium.util.bootstrap + "Bootsrap statistics" + (:require + [criterium.collect-plan :as collect-plan] + [criterium.metric :as metric] + [criterium.util.helpers :as util] + [criterium.util.probability :as probability] + [criterium.util.stats :as stats] + [criterium.util.well :as well])) + +(defn bootstrap-sample + "Bootstrap sampling of a statistic, using resampling with replacement." + [data statistic size rng-factory] + (assert (nat-int? size)) + (stats/transpose + (for [_ (range size)] (statistic (sort (stats/sample data (rng-factory))))))) + +(defn bootstrap-estimate + "Mean, variance and confidence interval. This uses the bootstrapped + statistic's variance for the confidence interval, but we should use BCa of + ABC." + [sampled-stat] + (let [n (count sampled-stat) + m (stats/mean sampled-stat n) + v (stats/variance* sampled-stat m n) + ;; stats ((juxt mean variance) sampled-stat) + stats [m v]] + (conj stats + (apply stats/confidence-interval stats)))) + +(defn scale-bootstrap-estimate [estimate ^double scale] + [(* ^double (:point-estimate estimate) scale) + (map #(* scale ^double (:value %1)) (:estimate-quantiles estimate))]) + +(defn drop-at [n coll] + (lazy-seq + (when-let [s (seq coll)] + (concat (take n s) (next (drop n s)))))) + +(defn jacknife + "Jacknife statistics on data." + [data statistic] + (stats/transpose + (map #(statistic (drop-at %1 data)) (range (count data))))) + +(defn bca-nonparametric-eval + "Calculate bootstrap values for given estimate and samples" + [size z-alpha estimate samples jack-samples] + {:pre [(> (count jack-samples) 1)]} + (let [z0 (probability/normal-quantile + (/ (count (filter (partial > estimate) samples)) + ^long size)) + jack-mean (stats/mean jack-samples) + jack-deviation (map #(- jack-mean ^double %1) jack-samples) + ^double sqr-deviation (reduce + 0.0 (map util/sqrd jack-deviation)) + acc (if (zero? sqr-deviation) + Double/POSITIVE_INFINITY + (/ ^double (reduce + + 0.0 + (map util/cubed jack-deviation)) + (* 6.0 (Math/pow sqr-deviation 1.5)))) + tt (map + (fn [^double x] + (probability/normal-cdf + (+ z0 (/ (+ z0 x) (- 1.0 (* acc (+ z0 x))))))) + z-alpha) + ooo (map + (fn [^double x] (util/trunc (* x ^long size))) + tt) + sorted-samples (sort samples) + confpoints (map (partial nth sorted-samples) ooo)] + [confpoints z0 acc jack-mean jack-samples])) + +(defn bca-nonparametric + "Non-parametric BCa estimate of a statistic on data. Size bootstrap samples + are used. Confidence values are returned at the alpha normal + quantiles. rng-factory is a method that returns a random number generator to + use for the sampling. + + An introduction to the bootstrap. Efron, B., & Tibshirani, R. J. (1993). + + See http://lib.stat.cmu.edu/S/bootstrap.funs for Efron's original + implementation." + [data statistic size alpha rng-factory] + (assert (nat-int? size)) + (let [data (sort data) + estimate (statistic data) + samples (bootstrap-sample data statistic size rng-factory) + jack-samples (jacknife data statistic) + alpha (if (vector? alpha) alpha [alpha]) + z-alpha (map probability/normal-quantile alpha)] + (if (vector? estimate) + (map + (partial bca-nonparametric-eval size z-alpha) + estimate samples jack-samples) + (bca-nonparametric-eval size z-alpha estimate samples jack-samples)))) + +(defrecord BcaEstimate + [point-estimate + estimate-quantiles]) + +(defn- bca-to-estimate + [alpha bca-estimate] + (assert (= 0.5 (first alpha)) alpha) + (->BcaEstimate + (first (first bca-estimate)) + (mapv + (fn [value z] {:value value :alpha z}) + (next (first bca-estimate)) + (next alpha)))) + +(defn bootstrap-bca + "Bootstrap a statistic. Statistic can produce multiple statistics as a vector + so you can use juxt to pass multiple statistics. + http://en.wikipedia.org/wiki/Bootstrapping_(statistics)" + [data statistic size alpha rng-factory] + (assert (nat-int? size)) + (let [bca (bca-nonparametric data statistic size alpha rng-factory)] + (if (vector? bca) + (bca-to-estimate alpha bca) + (map (partial bca-to-estimate alpha) bca)))) + +(defn bootstrap + "Bootstrap a statistic. Statistic can produce multiple statistics as a vector + so you can use juxt to pass multiple statistics. + http://en.wikipedia.org/wiki/Bootstrapping_(statistics)" + [data statistic size rng-factory] + (let [samples (bootstrap-sample data statistic size rng-factory)] + (if (vector? (first samples)) + (map bootstrap-estimate samples) + (bootstrap-estimate samples)))) + +(defn- scale-bootstrap-stat [scale-f stat] + (-> stat + (update :point-estimate scale-f) + (update :estimate-quantiles + #(mapv (fn [q] (update q :value scale-f)) %)))) + +(defn assoc-bootstrap-mean-3-sigma + [{:keys [mean variance] :as stats}] + (let [three-sigma (* 3 (Math/sqrt (:point-estimate variance))) + mean-plus-3sigma (+ ^double (:point-estimate mean) three-sigma) + mean-minus-3sigma (- ^double (:point-estimate mean) three-sigma)] + (assoc stats + :mean-plus-3sigma {:point-estimate mean-plus-3sigma} + :mean-minus-3sigma {:point-estimate mean-minus-3sigma}))) + +(defn scale-bootstrap-values + [stats f] + (util/update-vals stats f)) + +(def stats-fn-map + {:mean stats/mean + :variance stats/variance + :min-val stats/min + :max-val stats/max}) + +(defn stats-fns + [quantiles] + (into + (vec (vals stats-fn-map)) + (map #(partial stats/quantile %) quantiles))) + +(defn stats-fn + [fs] + (fn [vs] + (mapv #(% vs) fs))) + +(defn bootstrap-stats-for + [samples opts transforms] + {:pre [(:quantiles opts) + (:estimate-quantiles opts)]} + (let [vs (mapv double samples) + quantiles (into [0.25 0.5 0.75] (:quantiles opts)) + stats-fn (stats-fn (stats-fns quantiles)) + stats (bootstrap-bca + vs + stats-fn + (:bootstrap-size opts (long (* (count vs) 0.8))) + (into [0.5] (:estimate-quantiles opts)) + well/well-rng-1024a) + scale-1 (fn [v] (util/transform-sample-> v transforms)) + scale-f (partial scale-bootstrap-stat scale-1) + ks (keys stats-fn-map)] + (-> (zipmap ks stats) + (assoc-bootstrap-mean-3-sigma) + (scale-bootstrap-values scale-f) + (assoc :quantiles + (zipmap quantiles (map scale-f (drop (count ks) stats))))))) + +(defn bootstrap-stats* + [metric->values metric-configs transforms config] + (reduce + (fn [res path] + (assoc-in + res path + (bootstrap-stats-for + (get metric->values path) + config + transforms))) + {} + (map :path metric-configs))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn bootstrap-stats + ;; add stats to the result + ([] (bootstrap-stats {})) + ([{:keys [id metric-ids samples-id] :as analysis}] + (fn [data-map] + (let [id (or id :bootstrap-stats) + samples-id (or samples-id :samples) + metrics-samples (data-map samples-id) + metrics-defs (-> (:metrics-defs metrics-samples) + (metric/select-metrics metric-ids) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map samples-id) + result (bootstrap-stats* + (util/metric->values metrics-samples) + metric-configs + transforms + analysis)] + (assoc + data-map + id + {:type :criterium/bootstrap + :bootstrap result + :metrics-defs metrics-defs + :transform collect-plan/identity-transforms + :batch-size (:batch-size metrics-samples) + :source-id samples-id}))))) diff --git a/bases/criterium/src/criterium/util/debug.clj b/bases/criterium/src/criterium/util/debug.clj new file mode 100644 index 0000000..df3f90f --- /dev/null +++ b/bases/criterium/src/criterium/util/debug.clj @@ -0,0 +1,14 @@ +(ns criterium.util.debug) + +(def _debug (volatile! nil)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn debug! + ([] + (debug! true)) + ([flag] + (vreset! _debug flag))) + +(defn dtap> [x] + (when @_debug + (tap> x))) diff --git a/bases/criterium/src/criterium/util/format.clj b/bases/criterium/src/criterium/util/format.clj new file mode 100644 index 0000000..1744fcc --- /dev/null +++ b/bases/criterium/src/criterium/util/format.clj @@ -0,0 +1,162 @@ +(ns criterium.util.format + "Metric formatters") + +(defn round + "Round number to specified significant figures. + + Parameters: + n - Number to round + sig-figs - Number of significant figures to keep + + Returns: + Number rounded to specified significant figures" + [^double n ^long sig-figs] + (let [magnitude (Math/floor (Math/log10 (Math/abs n))) + scale (- sig-figs magnitude 1)] + (/ (Math/round (* n (Math/pow 10 scale))) + (Math/pow 10 scale)))) + +(defmulti scale + "Scale value with given dimensions keyword. + Return a [scale units] tuple. + scale is a multiplicative factor. units is a string." + #_{:clj-kondo/ignore [:unused-binding]} + (fn [dimension value] dimension)) + + +(defmethod scale :default + [_ _value] + [1 ""]) + +(defmethod scale :time ; seconds + [_ value] + (cond + (> (double value) 60) [(/ 60) "min"] + (< (double value) 1e-6) [1e9 "ns"] + (< (double value) 1e-3) [1e6 "µs"] + (< (double value) 1) [1e3 "ms"] + :else [1 "s"])) + +(def ^:const ONE-KB 1024) +(def ^:const ONE-MB (* 1024 1024)) +(def ^:const ONE-GB (* 1024 1024 1024)) + +(defmethod scale :memory + [_ value] + (cond + (< (long value) ONE-KB) [1 "bytes"] + (< (long value) ONE-MB) [(/ ONE-KB) "Kb"] + (< (long value) ONE-GB) [(/ ONE-MB) "Mb"] + :else [(/ ONE-GB) "Gb"])) + +(defn format-scaled + ([value scale] + (format "%3.3g" (double (* (double scale) (double value))))) + ([value scale unit] + (format "%3.3g %s" (double (* (double scale) (double value))) unit))) + +(defmulti format-value* + "Format value to 3 significant figures in an appropriate unit for the scale." + #_{:clj-kondo/ignore [:unused-binding]} + (fn [dimension value opts] dimension)) + +(defn format-value + "Format in an appropriate unit and precision for the scale." + ([dimension value] + (format-value* dimension value {:sf 3})) + ([dimension value opts] + (format-value* dimension value (merge {:sf 3} opts)))) + +(defmethod format-value* :default + [_ value opts] + (format "%s" (str value))) + +(defn- double-format-str [sf] + (case (long (or sf 0)) + 3 "%3.3g" + 4 "%4.4g" + "%3.3g")) + +(defmethod format-value* :time + [dimension value opts] + (let [[scale unit] (scale dimension value)] + (format + (str (double-format-str (:sf opts)) " %s") + (double (* (double scale) (double value))) + unit))) + +(defmethod format-value* :memory + [dimension value opts] + (let [[scale unit] (scale dimension value)] + (format + (str (double-format-str (:sf opts)) " %s") + (double (* (double scale) (double value))) + unit))) + +(defmulti format-metric + #_{:clj-kondo/ignore [:unused-binding]} + (fn [metric val] metric)) + +(defmethod format-metric :elapsed-time + [_ val] + (let [v (/ (double val) 1e9)] + (format "%32s: %s\n" "Elapsed time" (format-value :time v)))) + +(defn- format-count-time [[k {c :count t :time}]] + (format "%36s: count %d time %s\n" (name k) c (format-value :time t))) + +(defmethod format-metric :garbage-collector + [_ val] + (format "%32s:\n%s" "Garbage collection" + (apply str (map format-count-time val)))) + +(defmethod format-metric :finalization + [_ val] + (format "%32s: %d\n" "Pending finalisations" (:pending val))) + +(defn- format-memory-metrics [[k vs]] + (apply + str + (format "%36s:\n" (name k)) + (for [[mk v] vs] + (format "%40s: %s\n" (name mk) (format-value :memory v))))) + +(defmethod format-metric :memory + [_ val] + (format "%32s:\n%s" "Memory" + (apply str (map format-memory-metrics val)))) + +(defn- format-runtime-memory-metrics [[k v]] + (format "%36s: %s\n" (name k) (format-value :memory v))) + +(defmethod format-metric :runtime-memory + [_ val] + (format "%32s:\n%s" "Runtime Memory" + (apply str (map format-runtime-memory-metrics val)))) + +(defmethod format-metric :compilation + [_ val] + (let [v (:time-ms val)] + (format "%32s: %s\n" "JIT Compilation time" (format-value :time-ms v)))) + +(defn format-count [[k v]] + (format "%36s: %d\n" (name k) v)) + +(defmethod format-metric :class-loader + [_ val] + (apply + str + (format "%32s:\n" "Classloader") + (map format-count val))) + +(defmethod format-metric :state + [_ _] + "") + +(defmethod format-metric :expr-value + [_ _] + "") + +(defmethod format-metric :num-evals + [_ _] + "") diff --git a/bases/criterium/src/criterium/util/forms.clj b/bases/criterium/src/criterium/util/forms.clj new file mode 100644 index 0000000..abfe09b --- /dev/null +++ b/bases/criterium/src/criterium/util/forms.clj @@ -0,0 +1,33 @@ +(ns criterium.util.forms) + +(defmacro cond* + "A cond variant that allows :let bindings visible to subsequent clauses. + + Example: + (cond* + (foo?) (handle-foo) + :let [a 5] + (> a 3) (handle-big a) + :let [b (+ a 1)] + (bar? b) (handle-bar b) + :else (default-handler a b))" + {:style/indent 1} + [& clauses] + (letfn [(process [clauses] + (when (seq clauses) + (let [guard (first clauses) + remaining (next clauses) + body (first remaining)] + (when-not remaining + (throw + (IllegalArgumentException. + "cond* requires an even number of forms"))) + (if (= :let guard) + (if (vector? body) + `(let ~body + ~(process (next remaining))) + (throw + (IllegalArgumentException. + "cond* :let requires a binding vector"))) + `(if ~guard ~body ~(process (next remaining)))))))] + (process clauses))) diff --git a/bases/criterium/src/criterium/util/helpers.clj b/bases/criterium/src/criterium/util/helpers.clj new file mode 100644 index 0000000..6d5e6db --- /dev/null +++ b/bases/criterium/src/criterium/util/helpers.clj @@ -0,0 +1,303 @@ +(ns criterium.util.helpers + (:refer-clojure :exclude [update-vals]) + (:require + [criterium.types :as types] + [criterium.util.invariant :as invariant :refer [have have?]])) + + +(defn assoc-tag [sym t] + (vary-meta sym assoc :tag t)) + +(defn spy + [msg x] + (prn msg x) + x) + +(defn- safe-keys + [m] + (assert (or (map? m) (nil? m)) (pr-str m)) + {:pre [(or (map? m) (nil? m))]} + (dissoc m :state :expr-value)) + +(defn- merge-fn [op] + (fn merge-fn-inner [a b] + (if (or (map? a) (map? b)) + (merge-with + merge-fn-inner + (safe-keys a) + (safe-keys b)) + (op a b)))) + +(defn sum + ([] {}) + ([a b] + (merge-with + (merge-fn +) + (safe-keys a) + (safe-keys b)))) + +(defmacro sqr + "Square of argument" + [x] `(let [x# ~x] (* x# x#))) + +(defn sqrd + "Square of argument" + ^double [^double x] (* x x)) + +(defn cubed + "Cube of argument" + ^double [^double x] + (* x x x)) + +(defn trunc + "Round towards zero to an integeral value." + [^double x] + (if (pos? x) + (Math/floor x) + (Math/ceil x))) + +;; from clojure 1.11-aplha-2 +(def update-vals-impl + '(with-meta + (persistent! + (reduce-kv (fn [acc k v] (assoc! acc k (f v))) + (if (instance? clojure.lang.IEditableCollection m) + (transient m) + (transient {})) + m)) + (meta m))) + +(defmacro provide-update-vals + [] + (if (resolve 'clojure.core/update-vals) + '(clojure.core/update-vals m f) + update-vals-impl)) + +#_{:clj-kondo/ignore [:redefined-var :unused-binding]} +(defn update-vals + "m f => {k (f v) ...} + + Given a map m and a function f of 1-argument, returns a new map where + the keys of m are mapped to result of applying f to the corresponding + values of m." + [m f] + (provide-update-vals)) + +(defn filter-map + "Internal helper to filter map entries based on a predicate applied to values. + + Return a new map containing only the entries where (pred value) + returns true. Used internally for filtering metrics by their + configuration values." + [pred m] + (into {} (filter (comp pred val)) m)) + +(defn reduce-double-vector + "Reduce a double primitive value over a vector." + ^double [^clojure.lang.IFn$DOD f + ^double init + ^clojure.lang.APersistentVector v] + (let [n (.count v)] + (loop [acc init + i 0] + (if (< i n) + (recur (.invokePrim f acc (.nth v i)) (unchecked-inc i)) + acc)))) + +;; Modified version of clojure.walk to preserve metadata +(defn walk + "Traverses form, an arbitrary data structure. inner and outer are + functions. Applies inner to each element of form, building up a + data structure of the same type, then applies outer to the result. + Recognizes all Clojure data structures. Consumes seqs as with doall." + + {:added "1.1"} + [inner outer form] + (cond + (list? form) + (outer (with-meta + (apply list (map inner form)) + (meta form))) + + (instance? clojure.lang.IMapEntry form) + (outer + (clojure.lang.MapEntry/create + (inner (key form)) (inner (val form)))) + + (seq? form) + (outer (with-meta + (doall (map inner form)) + (meta form))) + + (instance? clojure.lang.IRecord form) + (outer (reduce (fn [r x] (conj r (inner x))) form form)) + + (coll? form) + (outer (with-meta + (into (empty form) (map inner form)) + (meta form))) + :else (outer form))) + +(defn postwalk + "Performs a depth-first, post-order traversal of form. Calls f on + each sub-form, uses f's return value in place of the original. + Recognizes all Clojure data structures. Consumes seqs as with doall." + {:added "1.1"} + [f form] + (walk (partial postwalk f) f form)) + +(defn deep-merge + "Merge maps recursively." + [& ms] + (letfn [(merge* [& xs] + (if (some #(and (map? %) (not (record? %))) xs) + (apply merge-with merge* xs) + (last xs)))] + (reduce merge* ms))) + +(defn report + "Print format output" + [format-string & values] + (print (apply format format-string values))) + +;;; Accessors + +(defn data-entry-map + [data type transform source-id] + {:data data + :type type + :transform transform + :source-id source-id}) + +(defn data + [data-entry-map] + {:pre [(have? :data data-entry-map)] + :post [(have? map? %)]} + (:data data-entry-map)) + +(defn metric->values + [metrics-samples] + {:pre [(have? types/generic-metrics-samples-map? metrics-samples)] + :post [(have? map? %)]} + (:metric->values metrics-samples)) + +(defn metric->digest + [digest-samples] + (:metric->digest digest-samples)) + +(defn quantiles + [quantiles-map] + {:pre [(have? types/quantiles-map? quantiles-map)] + :post [(have? map? %)]} + (:quantiles quantiles-map)) + +(defn outliers + [outliers-map] + {:pre [(have? types/outliers-map? outliers-map)] + :post [(have? map? %)]} + (:outliers outliers-map)) + +(defn outlier-significance + [outlier-significance-map] + {:pre [(have? types/outlier-significance-map? outlier-significance-map)] + :post [(have? map? %)]} + (:outlier-significance outlier-significance-map)) + +(defn stats + [stats-map] + {:pre [(have? types/stats-map? stats-map)] + :post [(have? map? %)]} + (:stats stats-map)) + +(defn event-stats + [event-stats-map] + {:pre [(have? types/event-stats-map? event-stats-map)] + :post [(have? map? %)]} + (:event-stats event-stats-map)) + +(defn bootstrap + [bootstrap-stats-map] + {:pre [(have? types/bootstrap-map? bootstrap-stats-map)] + :post [(have? map? %)]} + (:bootstrap bootstrap-stats-map)) + +;;; Value transforms + +;; These allow sample values to be transformed to mesurements, and vice versa. +;; This enables, using a log-normal transform of the samples. + +(defn add-transform-paths + [v sample-> ->sample] + (-> v + (update :sample-> (fnil conj '()) sample->) + (update :->sample (fnil conj '[]) ->sample))) + +(defn get-transforms + [result-map path] + {:pre [(have? types/result-map? result-map)]} + (loop [transforms (update-vals (:transform (have (result-map path))) vector) + path (:source-id (result-map path))] + (if path + (let [t (:transform (result-map path))] + (recur + (add-transform-paths transforms (:sample-> t) (:->sample t)) + (:source-id (result-map path)))) + transforms))) + +(defn transform-sample-> + ^double [value transforms] + (reduce (fn [v f] (f v)) value (:sample-> transforms))) + +(defn transform-vals-> + [m transforms] + (let [tform #(reduce (fn [v f] (f v)) % (:sample-> transforms))] + (update-vals m tform))) + +(defn transform->sample [value transforms] + (reduce (fn [v f] (f v)) value (reverse (:->sample transforms)))) + +;;; Thread +(defn valid-thread-priority + [p] + (when p + (cond + (= :max-priority p) Thread/MAX_PRIORITY + (= :min-priority p) Thread/MIN_PRIORITY + (and (integer? p) + (<= Thread/MIN_PRIORITY + p + Thread/MAX_PRIORITY)) p + :else + (throw (ex-info "Invalid thread priority" + {:priority p + :min-priority Thread/MIN_PRIORITY + :max-priority Thread/MAX_PRIORITY}))))) + +(defmacro with-thread-priority + [p & body] + `(let [priority# (valid-thread-priority ~p) + orig-priority# (.getPriority (Thread/currentThread))] + (when priority# + (.setPriority (Thread/currentThread) priority#)) + (try + ~@body + (finally + (when priority# + (.setPriority (Thread/currentThread) orig-priority#)))))) + +;;; Resolve + +(defn maybe-ver-get-named + "Resolve and deref a Named, or return the argument." + [k {:keys [default-ns]}] + {:pre [(or (keyword? k) (symbol? k))]} + (if-let [n (namespace k)] + (or (some-> (ns-resolve (symbol n) (symbol (name k))) deref) k) + (or (some-> (ns-resolve default-ns (symbol (name k))) deref) k))) + +(defn maybe-var-get + "Resolve and deref a Named, or return the argument." + [x options] + (if (or (keyword? x) (symbol? x)) + (maybe-ver-get-named x options) + x)) diff --git a/bases/criterium/src/criterium/util/histogram.clj b/bases/criterium/src/criterium/util/histogram.clj new file mode 100644 index 0000000..f76a1b0 --- /dev/null +++ b/bases/criterium/src/criterium/util/histogram.clj @@ -0,0 +1,116 @@ +(ns criterium.util.histogram + "Histogram computation utilities using Freedman-Diaconis rule for binning." + (:require + [clojure.math :as math])) + +(defn- quartiles + "Calculate quartiles Q1 and Q3 from sorted data. + Returns [q1 q3]" + [^doubles sorted-data] + (let [n (alength sorted-data) + q1-idx (quot n 4) + q3-idx (quot (* 3 n) 4)] + [(aget sorted-data q1-idx) + (aget sorted-data q3-idx)])) + +(defn- compute-iqr + "Compute Interquartile Range (IQR) from vector of values" + ^double [values] + (let [sorted (double-array (sort values)) + [q1 q3] (quartiles sorted)] + (- (double q3) (double q1)))) + +(def ^:private ^:const minus-one-third + (/ -1.0 3.0)) + +(defn- compute-bin-width + "Compute bin width using Freedman-Diaconis rule: + width = 2 * IQR * n^(-1/3)" + [values ^double iqr] + (let [n (count values)] + (* 2.0 iqr (math/pow n minus-one-third)))) + +(defn- generate-bins + "Generate bin edges and centers based on data range and bin width" + [^double min-val ^double max-val ^double bin-width] + (let [data-range (- max-val min-val) + ;; Ensure at least 2 bins for distinct values + num-bins (max 2 (int (math/ceil (/ data-range bin-width)))) + adjusted-width (/ data-range num-bins) ; Adjust to exactly cover range + edges (mapv #(+ min-val (* (double %) adjusted-width)) + (range (inc num-bins))) + centers (mapv #(+ min-val (* (+ (double %) 0.5) adjusted-width)) + (range num-bins))] + {:edges edges + :centers centers + :width adjusted-width + :num-bins num-bins})) + +(defn- count-values-in-bins + "Count number of values falling into each bin" + [values edges] + (let [bins (int-array (dec (count edges))) + last-idx (dec (alength bins))] + (doseq [v values] + (loop [idx 0] + (when (< idx (count bins)) + (let [v (double v) + lower (double (nth edges idx)) + upper (double (nth edges (inc idx)))] + (if (or (and (<= lower v) (< v upper)) + (and (= idx last-idx) (<= lower v) (<= v upper))) + (aset bins idx (inc (aget bins idx))) + (when (< idx last-idx) + (recur (inc idx)))))))) + (vec bins))) + +(defn- compute-density + "Compute probability density for each bin" + [counts ^long total-samples] + (mapv #(double (/ (long %) total-samples)) counts)) + +(defn histogram + "Compute histogram from vector of numeric values using Freedman-Diaconis rule. + Optional pre-computed IQR can be provided. + Returns map containing: + - :counts - vector of bin counts + - :centers - vector of bin centers + - :width - bin width + - :density - vector of probability density values + - :n - total number of samples + - :min - minimum value + - :max - maximum value + + Throws: + - ex-info {:error :histogram/no-values} for empty input + - ex-info {:error :histogram/same-values} when all values are the same" + ([values] + (histogram values nil)) + ([values precomputed-iqr] + (when (empty? values) + (throw (ex-info + "Input vector cannot be empty" + {:error :histogram/no-values}))) + (let [min-val (reduce min values) + max-val (reduce max values)] + (when (= min-val max-val) + (throw (ex-info + "All values are the same - cannot create histogram" + {:error :histogram/same-values + :min-val min-val + :max-val max-val}))) + (let [iqr (or precomputed-iqr (compute-iqr values)) + bin-width (compute-bin-width values iqr) + {:keys [edges centers width num-bins]} + (generate-bins min-val max-val bin-width) + counts (count-values-in-bins values edges) + n (count values) + density (compute-density counts n)] + {:counts counts + :centers centers + :width width + :density density + :n n + :num-bins num-bins + :min min-val + :max max-val})))) diff --git a/bases/criterium/src/criterium/util/invariant.clj b/bases/criterium/src/criterium/util/invariant.clj new file mode 100644 index 0000000..04e951f --- /dev/null +++ b/bases/criterium/src/criterium/util/invariant.clj @@ -0,0 +1,63 @@ +(ns criterium.util.invariant + "Assertion macros inspired by truss.") + +(defn truthy? [x] + (and (some? x) (not (false? x)))) + +(defn assertion-error [msg data] + (let [^Exception ex (ex-info msg data) + trace (into-array StackTraceElement + (drop 2 (.getStackTrace ex)))] + ;; Hack the stack trace so it appears to come from the assertion site + (.setStackTrace ex trace) + (AssertionError. msg ex))) + +(defn- have* [x args truthy? &form] + (let [[f x data] (if (seq args) + (into [x] args) + ['criterium.util.invariant/truthy? x]) + ns-sym (ns-name *ns*) + line (:line (meta &form)) + column (:column (meta &form) -1) + x-sym (gensym "x")] + (when (>(count args) 3) + (throw + (ex-info + "have expects at most three arguments" + {:line line :column column}))) + `(let [~x-sym ~x + v# (~f ~x-sym)] + (when-not v# + (let [msg# (str + "Invariant failed at " + ~(str ns-sym "[" line ":" column "] ") + (list '~f ~x-sym))] + (throw (assertion-error + msg# + {:pred ~(list 'quote f) + :arg {:form ~(list 'quote x) + :value ~x-sym + :type (type ~x-sym)} + :loc {:ns '~ns-sym + :line ~line + :column ~column + :file ~*file*} + :data ~data})))) + ~(if truthy? + true + x-sym)))) + +(defmacro have + "Assertion macro inspired by truss." + {:arglists '[[x][f x][f x data]]} + ([x & args] + (if *assert* + (have* x args (not :truthy) &form) + x))) + +(defmacro have? + "Assertion macro inspired by truss." + ([x & args] + (if *assert* + (have* x args :truthy &form) + x))) diff --git a/bases/criterium/src/criterium/util/kde.clj b/bases/criterium/src/criterium/util/kde.clj new file mode 100644 index 0000000..156209d --- /dev/null +++ b/bases/criterium/src/criterium/util/kde.clj @@ -0,0 +1,53 @@ +(ns criterium.util.kde) + +;; Mellin-Meijer-kernel density estimation on R+. Gery Geenens∗ School of +;; Mathematics and Statistics, UNSW Sydney, Australia July 17, 2017 + +(defn gamma-fn + "Returns Gamma(z + 1 = number) using Lanczos approximation. + Taken from rosettacode." + [number] + (if (< number 0.5) + (/ Math/PI (* (Math/sin (* Math/PI number)) + (gamma-fn (- 1 number)))) + (let [n (dec number) + c [0.99999999999980993 676.5203681218851 -1259.1392167224028 + 771.32342877765313 -176.61502916214059 12.507343278686905 + -0.13857109526572012 9.9843695780195716e-6 1.5056327351493116e-7]] + (* (Math/sqrt (* 2 Math/PI)) + (Math/pow (+ n 7 0.5) (+ n 0.5)) + (Math/exp (- (+ n 7 0.5))) + (+ (first c) + (apply + (map-indexed #(/ %2 (+ n %1 1)) (next c)))))))) + +(comment + (gamma-fn 1) ; 1 + (gamma-fn 2) ; 1 + (gamma-fn 3) ; 2 + (gamma-fn 4) ; 6 + (gamma-fn 5) ; 24 + ) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn mellin-transform + "Mellin transform. + + Mellin-Meijer-kernel density estimation on R+. Gery Geenens∗ School of + Mathematics and Statistics, UNSW Sydney, Australia July 17, 2017 + + Eq, 2.19" + [nu gamma zeta theta z] + (let [tan-theta (Math/tan theta) + cos-theta (Math/cos theta) + ;; sin-theta (Math/sin theta) + a1 (/ (* zeta zeta) (* gamma gamma cos-theta cos-theta)) + a2 (/ (* zeta zeta) (* gamma gamma cos-theta cos-theta)) + e1 (+ a1 (* zeta (- z 1))) + e2 (+ a2 (* zeta (- 1 z)))] + (* (Math/pow nu (- z 1)) + (Math/pow (/ 1 (* tan-theta tan-theta)) (* zeta (- z 1))) + (/ (* (gamma-fn e1) (gamma-fn e2)) + (* (gamma-fn a1) (gamma-fn a2)))))) + +;; (mellin-transform 1 1 1 0 1) +;; (mellin-transform 3 1 2 0 1) diff --git a/bases/criterium/src/criterium/util/output.clj b/bases/criterium/src/criterium/util/output.clj new file mode 100644 index 0000000..5d29765 --- /dev/null +++ b/bases/criterium/src/criterium/util/output.clj @@ -0,0 +1,16 @@ +(ns criterium.util.output) + +(def ^:dynamic *report-progress* + "Flag to control output of progress messages" + nil) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn ^:skip-wiki progress + "Conditionally report progress to *out*." + [& message] + (when *report-progress* + (apply println message))) + +(defmacro with-progress-reporting [flag & body] + `(binding [*report-progress* ~flag] + ~@body)) diff --git a/bases/criterium/src/criterium/util/probability.clj b/bases/criterium/src/criterium/util/probability.clj new file mode 100644 index 0000000..700720d --- /dev/null +++ b/bases/criterium/src/criterium/util/probability.clj @@ -0,0 +1,118 @@ +(ns criterium.util.probability + "probability functions") + +(defn polynomial-value + "Evaluate a polynomial at the given value x, for the coefficients given in + descending order (so the last element of coefficients is the constant term)." + ^double [^double x ^doubles coefficients] + (reduce + #(+ (* x ^double %1) ^double %2) + (first coefficients) + (rest coefficients))) + +(def a-coeffs + [1.061405429 -1.453152027 1.421413741 -0.284496736 0.254829592 0.0]) + +(defn erf + "erf polynomial approximation. Maximum error is 1.5e-7. + Handbook of Mathematical Functions: with Formulas, Graphs, and Mathematical + Tables. Milton Abramowitz (Editor), Irene A. Stegun (Editor), 7.1.26" + ^double [^double x] + (let [sign (Math/signum x) + x (Math/abs x) + a a-coeffs + p 0.3275911 + t (/ (+ 1.0 (* p x))) + value (- 1.0 (* (polynomial-value t a) + (Math/exp (- (* x x)))))] + (* sign value))) + +(defn normal-cdf + "Probability p(Xvalues path] + {:pre [(have? map? metric->values)]} + (->> (have seq (metric->values path) + {:path path :metric->values metric->values}) + (filterv some?) + (mapv double))) + +(defn scale-vals [m scale-1] + (util/update-vals m scale-1)) + +(defn quantiles-for + [path samples config] + {:pre [(have? seq path) + (have? seq samples) + (have? map? samples)]} + (have :quantiles config) + (have (comp not :tail-quantile) config) + (let [qs (into [0.25 0.5 0.75] (:quantiles config)) + vs (sort (samples-for-path samples path))] + (sample-quantiles qs vs))) + +(defn stats-for + [vs _config] + {:pre [(have? seq vs)]} + (let [vs (sort vs)] + (-> (into {} (stats-fns vs)) + (assoc-mean-3-sigma) + (assoc :n (count vs))))) + +(defn quantiles + [samples metric-configs config] + {:pre [(have? seq samples)]} + (reduce + (fn [res path] + (assoc-in res path (quantiles-for path samples config))) + {} + (map :path metric-configs))) + +(defn sample-stats + [metric->values outliers metric-configs config] + (reduce + (fn [res path] + (let [ols (:outliers (get-in outliers path) {}) + vs (samples-for-path metric->values path) + without-outliers (if ols + (into [] + (comp + (map-indexed (fn [i v] (when-not (ols i) v))) + (filter some?)) + vs) + vs)] + (if (seq vs) + (assoc-in res path (stats-for without-outliers config)) + res))) + {} + (mapv :path metric-configs))) + +(defn event-stats + "Return the stats for events like JIT compilation and garbage-collector." + [metrics-defs samples] + (reduce-kv + (fn [stats _k metric] + (if-let [groups (:groups metric)] + (merge stats (event-stats groups samples)) + (let [ms (:values metric) + all-vs (mapv #(get samples (:path %)) ms) + all-vals (mapv (fn [vs] (reduce + 0 vs)) all-vs) + sample-count (reduce + + + 0 + (apply mapv + (fn [& vs] (if (some pos? vs) 1 0)) + all-vs))] + (merge stats + (-> (zipmap + (map :path ms) + all-vals) + (assoc + (conj (vec (butlast (:path (first ms)))) :sample-count) + sample-count)))))) + {} + metrics-defs)) diff --git a/bases/criterium/src/criterium/util/stats.clj b/bases/criterium/src/criterium/util/stats.clj new file mode 100644 index 0000000..e5ab62c --- /dev/null +++ b/bases/criterium/src/criterium/util/stats.clj @@ -0,0 +1,248 @@ +(ns criterium.util.stats + "A collection of statistical methods used by criterium" + (:refer-clojure :exclude [min max]) + (:require + [criterium.util.helpers :as util])) + +;;; Utilities +(defn transpose + "Transpose a vector of vectors." + [data] + (if (vector? (first data)) + (apply map vector data) + data)) + +;;; Statistics + +(defn min + ([data] + (reduce clojure.core/min data)) + ([data _count] + (reduce clojure.core/min data))) + +(defn max + ([data] + (reduce clojure.core/max data)) + ([data _count] + (reduce clojure.core/max data))) + +(defn unchecked-add-d + ^double [^double a ^double b] + (unchecked-add a b)) + +(defn mean + "Arithmetic mean of data." + (^double [data] + (let [c (count data)] + (when (pos? c) + (/ (double (reduce unchecked-add-d 0.0 data)) c)))) + (^double [data ^long count] + (/ (double (reduce unchecked-add-d 0.0 data)) count))) + +(defn sum + "Sum of each data point." + [data] (reduce + data)) + +(defn sum-of-squares + "Sum of the squares of each data point." + [data] + (reduce + (fn ^double [^double s ^double v] + (+ s (* v v))) 0.0 data)) + +(defn variance* + "variance based on subtracting mean" + ^double [data ^double mean ^long df] + (/ (double + (reduce + (fn ^double [^double a ^double b] + (+ a (util/sqr (- b mean)))) + 0.0 + data)) + df)) + +(defn variance + "Return the variance of data. + + By default returns the sample variance with (- (count data) 1) degrees + of freedom. + + The population variance can be returned using (variance data 0), which uses + (count data) degrees of freedom. + + Ref: Chan et al. Algorithms for computing the sample variance: analysis and + recommendations. American Statistician (1983)." + (^double [data] (variance data 1)) + (^double [data ^long df] + ;; Uses a single pass, non-pairwise algorithm, without shifting. + (letfn [(update-estimates [[^double m ^double q ^long k] ^double x] + (let [kp1 (inc k) + delta (- x m)] + [(+ m (/ delta kp1)) + (+ q (/ (* k (util/sqr delta)) kp1)) + kp1]))] + (let [[_ ^double q ^long k] (reduce update-estimates [0.0 0.0 0] data)] + (when (> k df) + ;; (throw (ex-info + ;; "insufficient data to calculate variance" + ;; {:data data})) + + (/ q (- k df))))))) + +;; For the moment we take the easy option of sorting samples +(defn median + "Calculate the median of a sorted data set. + Return [median, [vals less than median] [vals greater than median]] + References: http://en.wikipedia.org/wiki/Median" + [data] + (let [n (count data) + i (bit-shift-right n 1)] + (if (even? n) + [(/ (+ (double (nth data (dec i))) + (double (nth data i))) + 2.0) + (take i data) + (drop i data)] + [(nth data (bit-shift-right n 1)) + (take i data) + (drop (inc i) data)]))) + +(defn quartiles + "Calculate the quartiles of a sorted data set + References: http://en.wikipedia.org/wiki/Quartile" + [data] + (let [[m lower upper] (median data)] + [(first (median lower)) m (first (median upper))])) + +(defn quantile + "Calculate the quantile of a sorted data set + References: http://en.wikipedia.org/wiki/Quantile" + [^double quantile data] + (let [n (dec (count data)) + interp (fn [^double x] + (let [f (Math/floor x) + i (long f) + p (- x f)] + (cond + (zero? p) (nth data i) + (= 1.0 p) (nth data (inc i)) + :else (+ (* p (double (nth data (inc i)))) + (* (- 1.0 p) (double (nth data i)))))))] + (interp (* quantile n)))) + +(defn boxplot-outlier-thresholds + "Outlier thresholds for given quartiles." + [^double q1 ^double q3] + {:pre [(number? q1) (number? q3)]} + (let [iqr (- q3 q1) + severe (* iqr 3.0) + mild (* iqr 1.5)] + [(- q1 severe) + (- q1 mild) + (+ q3 mild) + (+ q3 severe)])) + +(defn uniform-distribution + "Return uniformly distributed deviates on 0..max-val use the specified rng." + [^double max-val rng] + (map (fn [^double x] (* x max-val)) rng)) + +(defn sample-uniform + "Provide n samples from a uniform distribution on 0..max-val" + [n max-val rng] + (take n (uniform-distribution max-val rng))) + +(defn sample + "Sample with replacement." + [x rng] + (let [n (count x)] + (map #(nth x %1) (sample-uniform n n rng)))) + +(defn confidence-interval + "Find the significance of outliers given boostrapped mean and variance + estimates. This uses the bootstrapped statistic's variance, but we should use + BCa of ABC." + [^double mean ^double variance] + (let [n-sigma 1.96 ; use 95% confidence interval + delta (* n-sigma (Math/sqrt variance))] + [(- mean delta) (+ mean delta)])) + +;;; Nonparametric assessment of multimodality for univariate data. +;;; Salgado-Ugarte IH, Shimizu M. 1998 + +;;; Maximum likelihood kernel density estimation: On the potential of convolution sieves. +;;; Jones and Henderson. Computational Statistics and Data Analysis (2009) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn modal-estimation-constant + "Kernel function for estimation of multi-modality. + h-k is the critical bandwidth, sample-variance is the observed sample variance. + Equation 7, Nonparametric assessment of multimodality for univariate + data. Salgado-Ugarte IH, Shimizu M" + [^double h-k ^double sample-variance] + (Math/sqrt (+ 1 (/ (util/sqr h-k) sample-variance)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn smoothed-sample + "Smoothed estimation function." + [^double c-k ^double h-k data deviates] + (lazy-seq + (cons + (* c-k (+ ^double (first data) + (* h-k ^double (first deviates)))) + (when-let [n (next data)] + (smoothed-sample c-k h-k n (next deviates)))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn gaussian-weight + "Weight function for gaussian kernel." + [^double t] + (let [k (Math/pow (* 2 Math/PI) -0.5)] + (* k (Math/exp (/ (* t t) -2))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn kernel-density-estimator + "Kernel density estimator for x, given n samples X, weights K and width h." + [h K n X x] + (/ ^double (reduce + (fn ^double [^double a ^double b] + (+ a + ^double (K (/ (- ^double x b) ^double h)))) + 0.0 X) + (* (long n) (double h)))) + +(defn sum-square-delta ^double [vs ^double mv] + (reduce + (map (comp util/sqrd (fn [^double x] (- x mv))) vs))) + +(defn- muld ^double [^double a ^double b] (* a b)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defn linear-regression + [xs ys] + (let [n (count xs) + mx (/ ^double (reduce + xs) n) + my (/ ^double (reduce + ys) n) + nmxmy (* n mx my) + nmxmx (* n mx mx) + s1 (- ^double (reduce + (map muld xs ys)) + nmxmy) + s2 (- ^double (reduce + (map util/sqrd xs)) + nmxmx) + a1 (/ s1 s2) + a0 (- my (* a1 mx)) + f (fn ^double [^double x] (+ a0 (* a1 x))) + pred-ys (mapv f xs) + sqr-residuals (mapv (comp util/sqrd -) ys pred-ys) + ss-residuals (double (reduce + sqr-residuals)) + variance (/ ss-residuals (- n 2)) + ss-total (sum-square-delta ys my) + r-sqr (- 1 (/ ss-residuals ss-total))] + {:coeffs [a0 a1] + :variance variance + :r-sqr r-sqr})) + +;; (= (linear-regression (range 10) (range 10)) [0 1 0 1]) +;; (= (linear-regression (range 10) (range 1 11)) [1 1 0 1]) + +;; (let [[a0 a1] (linear-regression (range 10) (range 1 11))] +;; (+ a0 (* a1 10))) diff --git a/bases/criterium/src/criterium/util/t_digest.clj b/bases/criterium/src/criterium/util/t_digest.clj new file mode 100644 index 0000000..d89fb41 --- /dev/null +++ b/bases/criterium/src/criterium/util/t_digest.clj @@ -0,0 +1,62 @@ +(ns criterium.util.t-digest + (:require + [criterium.util.t-digest.merging-digest :as md])) + +(defn new-digest [] + (md/new-digest)) + +(defn add-point + "Add a single value into the digest" + [digest value] + (md/add-point digest value)) + +(defn compress + "Merge any buffered points into the digest." + [digest] + (md/compress digest)) + +(defn quantile + "Return estimated value at given quantile [0,1]. + Return nil if digest is empty." + ^double [digest ^double x] + (md/quantile digest x)) + +(defn cdf + "Return the cumulative probability at x. + Return NaN if digest is empty." + ^double [digest ^double x] + (md/cdf digest x)) + +(defn sample-count + ^double [digest] + (md/sample-count digest)) + +(defn minimum + ^double [digest] + (md/minimum digest)) + +(defn maximum + ^double [digest] + (md/maximum digest)) + +(defn mean + "Return the mean estimate. + Return NaN if digest is empty." + ^double [digest] + (md/mean digest)) + +(defn variance + "Return the mean estimate. + Return NaN if digest is empty." + (^double [digest] + (md/variance digest)) + (^double [digest ^double mean] + (md/variance digest mean))) + +(defn transform + [digest f] + (md/transform digest f)) + +(defn centroid-means + [digest] + (md/centroid-means digest)) diff --git a/bases/criterium/src/criterium/util/t_digest/merging_digest.clj b/bases/criterium/src/criterium/util/t_digest/merging_digest.clj new file mode 100644 index 0000000..2a5d8f1 --- /dev/null +++ b/bases/criterium/src/criterium/util/t_digest/merging_digest.clj @@ -0,0 +1,453 @@ +(ns criterium.util.t-digest.merging-digest + "Implementation of the t-digest algorithm for streaming quantile estimation. + Based on the MergingDigest variant from https://github.com/tdunning/t-digest" + (:require + [criterium.util.forms :refer [cond*]] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.util.t-digest.scale :as scale :refer [finite?]]) + (:import + [clojure.lang + IPersistentVector] + [criterium.util.t_digest.scale + Scale])) + +(defrecord Centroid + [^double mean + ^double weight]) + +(defn centroid-weight + ^double [^Centroid centroid] + (.weight centroid)) + +(defn centroid-mean + ^double [^Centroid centroid] + (.mean centroid)) + +(defrecord TDigest + [^double compression + ^IPersistentVector centroids ;existing centroids + ^IPersistentVector temp-centroids ;buffer for new points + ^double total-weight + ^double unmerged-weight + ^double minimum + ^double maximum + ^Scale scale + ^long buffer-size]) + +(def ^:private default-compression 100.0) + +(def ^:private default-buffer-size 128) + +(defn new-digest + "Creates a new t-digest with given compression factor." + ([] (new-digest default-compression)) + ([compression] + (new-digest compression default-buffer-size)) + ([compression buffer-size] + (->TDigest + (double compression) + [] ; centroids + [] ; temp-centroids + 0.0 ; total-weight + 0.0 ; unmerged-weight + Double/NaN ; minimum + Double/NaN ; maximum + scale/k2 + buffer-size))) + +(defn- weighted-average + "Compute weighted average of two points" + ^double [^double x1 ^double w1 ^double x2 ^double w2] + (/ (+ (* x1 w1) (* x2 w2)) + (+ w1 w2))) + +(defn- merge-centroids + "Merges centroids while maintaining t-digest invariants." + [compression sorted-centroids total-weight scale + & {:keys [use-weight-limit] :or {use-weight-limit true}}] + (if (<= (count sorted-centroids) 1) + (vec sorted-centroids) + (let [total-weight (double total-weight) + normalizer (scale/normalizer scale compression total-weight) + k1 (scale/k scale 0.0 normalizer) + w-limit (* total-weight (scale/q scale (+ k1 1.0) normalizer))] + (loop [result [] + w-so-far 0.0 + [^Centroid c1 ^Centroid c2 & more :as _centroids] + sorted-centroids] + (have vector? result) + (if (nil? c2) + (let [final (if c1 (conj result c1) result)] + (have #(= total-weight %) + (reduce + 0.0 (mapv #(.weight ^Centroid %) final))) + #_(have #(= 1.0 %) + (.weight ^Centroid (first final)) + {:final final}) + #_(have #(= 1.0 %) + (.weight ^Centroid (peek final)) + {:final final}) + final) + (let [proposed-weight (+ (.weight c1) (.weight c2)) + add-this? + (cond + (= (count result) 0) ; first iteration + false + (empty? more) ; last pair + false + :else + (let [projected-w (+ w-so-far proposed-weight)] + (if use-weight-limit + (let [q0 (/ w-so-far total-weight) + q2 (/ projected-w total-weight)] + (<= proposed-weight + (* total-weight + (min + (scale/max-size scale q0 normalizer) + (scale/max-size scale q2 normalizer))))) + (<= projected-w w-limit))))] + (if add-this? + ;; merge c2 into c1 + (recur result + w-so-far + (into [(->Centroid + (weighted-average + (.mean c1) (.weight c1) + (.mean c2) (.weight c2)) + proposed-weight)] + more)) + ;; emit c1, continue with rest + (recur (conj result c1) + (+ w-so-far (.weight ^Centroid c1)) + (into [c2] more))))))))) + +(defn- merge-new-values + "Merges any buffered points into the digest." + [{:keys [temp-centroids centroids] :as ^TDigest digest}] + (if (seq temp-centroids) + (let [sorted-centroids (sort-by :mean temp-centroids) + total-weight (.total-weight digest) + unmerged-weight (.unmerged-weight digest) + compression (.compression digest) + new-total-weight (+ total-weight unmerged-weight) + merged-centroids (merge-centroids + compression + (sort-by :mean (into sorted-centroids centroids)) + new-total-weight + (:scale digest))] + (assoc digest + :centroids (have vector? merged-centroids) + :temp-centroids [] + :total-weight new-total-weight + :unmerged-weight 0.0)) + digest)) + +(defn add-point + "Adds a single value with weight to the digest" + ([^Centroid digest value] + (add-point digest value 1.0)) + ([{:keys [temp-centroids] :as ^TDigest digest} + ^double value + ^double weight] + (when (Double/isNaN value) + (throw (ex-info "Cannot add NaN to t-digest" {:value value}))) + (let [buffer-size (.buffer-size digest) + ^TDigest digest (if (>= (count temp-centroids) buffer-size) + (merge-new-values digest) + digest) + minimum (.minimum digest) + maximum (.maximum digest) + new-min (if (NaN? minimum) + value + (min value minimum)) + new-max (if (NaN? maximum) + value + (max value maximum))] + (-> digest + (update :temp-centroids conj (->Centroid value weight)) + (update :unmerged-weight + weight) + (assoc :minimum new-min + :maximum new-max))))) + +(defn compress + "Merges any buffered points into the digest." + [digest] + (merge-new-values digest)) + +(defn vfirst + [^IPersistentVector v] + (.nth v 0)) + +(defn vsecond + [^IPersistentVector v] + (.nth v 1)) + +(defn vpeek + [^IPersistentVector v] + (.peek v)) + +;; NOTE clojure's primitive functions can only have four arguments +(defn- relative-pos ^double [^double x0 ^double x1 ^double x] + (/ (- x x0) (- x1 x0))) + +(defn- interpolate-rel + ^double [^double v0 ^double v1 ^double t] + (+ v0 (* (- v1 v0) t))) + +(defmacro interpolate [v0 v1 x0 x1 x] + `(interpolate-rel ~v0 ~v1 (relative-pos ~x0 ~x1 ~x))) + +(defn quantile + "Returns estimated value at given quantile (0-1). + Returns NaN if digest is empty." + ^double [{:keys [^IPersistentVector centroids] :as ^TDigest digest} + ^double q] + {:pre [(have? #(<= 0.0 % 1.0) q) + (have? digest) + (have? vector? centroids)]} + (let [n (count centroids) + total-weight (.total-weight digest) + minimum (.minimum digest) + maximum (.maximum digest)] + (have finite? total-weight) + (cond* + ;; no centroids or single centroid + (= n 0) Double/NaN + + :let [^Centroid first-centroid (vfirst centroids)] + (= n 1) (.mean first-centroid) + + ;; multiple centroids + :else + (let [index (* q total-weight)] + (cond* + ;; Boundaries return min/max + (< index 1.0) + minimum + + (> index (- total-weight 1.0)) + maximum + + ;; left centroid interpolation + :let [first-weight (.weight first-centroid)] + (and (> first-weight 1.0) + (< index (/ first-weight 2.0))) + (interpolate + minimum + (.mean first-centroid) + 0.0 + (/ first-weight 2.0) + index) + + ;; right centroid interpolation + :let [^Centroid last-centroid (.peek centroids) + last-weight (.weight last-centroid)] + (and (> last-weight 1) + (<= (- total-weight index) + (/ last-weight 2.0))) + (interpolate + (.mean last-centroid) + maximum + (- total-weight last-weight) + total-weight + index) + + ;; interpolate between centroids + :else + (loop [weight-so-far (/ first-weight 2.0) + centroids centroids] + (let [^Centroid c1 (vfirst centroids) + ^Centroid c2 (vsecond centroids) + dw (/ (+ (.weight c1) (.weight c2)) 2) + nextw (+ weight-so-far dw)] + (if (<= nextw index) + (recur nextw (subvec centroids 1)) + ;; centroids c1 and c2 bracket our point + (let [left-unit? (= (.weight c1) 1.0) + right-unit? (= (.weight c2) 1.0)] + (cond + (and left-unit? (< (- index weight-so-far) 0.5)) + (.mean c1) + (and right-unit? (<= (- nextw index) 0.5)) + (.mean c2) + :else + (let [z1 (- index weight-so-far) + z2 (- (+ weight-so-far dw) index)] + (weighted-average + (.mean c1) z2 + (.mean c2) z1)))))))))))) + +(defn interpolate-centroids + ^double [^Centroid left ^Centroid right ^double x] + (let [left-weight (.weight left) + right-weight (.weight right) + left-single? (= left-weight 1.0) + right-single? (= right-weight 1.0)] + ;; For singleton centroids, their entire weight is exactly at + ;; the centroid and thus shouldn't be interpolated. + (if (and left-single? right-single?) + 0.5 + (let [left-x (.mean left) + right-x (.mean right) + base (if left-single? 0.5 0.0) + dw (/ (+ (if left-single? 0.0 left-weight) + (if right-single? 0.0 right-weight)) + 2.0)] + (+ base (* dw (/ (- x left-x) (- right-x left-x)))))))) + +(defn cdf + "Returns cumulative probability at x. + Returns NaN if digest is empty." + ^double [{:keys [centroids] :as ^TDigest digest} ^double x] + (when (or (Double/isNaN x) (Double/isInfinite x)) + (throw (ex-info "Invalid value" {:x x}))) + + (let [n (count centroids) + minimum (.minimum digest) + maximum ( .maximum digest) + total-weight (.total-weight digest)] + (cond* + (zero? n) Double/NaN + + (= 1 n) ; single centroid case + (let [width (- maximum minimum)] + (cond + (< x minimum) 0.0 + (> x maximum) 1.0 + (<= width 0.0) 0.5 ; min ≈ max + :else (/ (- x minimum) width))) + + (< x minimum) 0.0 + (> x maximum) 1.0 + + :let [^Centroid first-centroid (vfirst centroids) + first-mean (.mean first-centroid)] + + ;; Left tail + (< x first-mean) + (if (> first-mean minimum) + (if (= x minimum) + (/ 0.5 total-weight) + (/ (interpolate + 0.0 + (/ (.weight first-centroid) 2.0) + minimum + first-mean + x) + total-weight)) + 0.0) + + :let [^Centroid last-centroid (vpeek centroids) + last-mean (.mean last-centroid)] + + ;; Right tail + (> x last-mean) + (if (> maximum last-mean) + (if (= x maximum) + (- 1.0 (/ 0.5 total-weight)) + (- 1.0 + (/ (interpolate + (- total-weight (/ (.weight last-centroid) 2.0)) + total-weight + last-mean + maximum + x) + total-weight))) + 1.0) + + :else + ;; Main interpolation between centroids + (loop [weight-so-far (/ (.weight first-centroid) 2) + centroids centroids] + (let [^Centroid c1 (vfirst centroids)] + (cond* + (= (.mean c1) x) + ;; Handle exact match + (let [more (when (> (count centroids) 2) + (subvec centroids 2)) + ^double dw (loop [w (.weight c1) + cs more] + (let [^Centroid c (and (pos? (count cs)) + (vfirst cs))] + (if (and c (= (.mean c) x)) + (recur (+ w (.weight c)) (subvec cs 1)) + w)))] + (/ (+ weight-so-far (/ dw 2)) total-weight)) + + :let [^Centroid c2 (vsecond centroids)] + (and (<= (.mean c1) x) (< x (.mean c2))) + ;; Interpolate between c1 and c2 + (/ (+ weight-so-far + (interpolate-centroids c1 c2 x)) + total-weight) + + :else + (let [dw (/ (+ (.weight c1) (.weight c2)) 2.0)] + (recur (+ weight-so-far dw) (subvec centroids 1))))))))) + +(defn compressed? + [{:keys [temp-centroids] :as _digest}] + (empty temp-centroids)) + +(defn- transform-centroid + [f ^Centroid centroid] + (update centroid :mean f)) + +(defn transform + [{:keys [buffer-size + compression + centroids] + :as ^TDigest digest} f] + (have compressed? digest) + (->TDigest + (double compression) + (mapv (partial transform-centroid f) centroids) ; centroids + [] ; temp-centroids + (.total-weight digest) ; total-weight + 0.0 ; unmerged-weight + (f (.minimum digest)) ; minimum + (f (.maximum digest)) ; maximum + scale/k2 + buffer-size)) + +(defn sample-count + ^double [^TDigest digest] + (.total-weight digest)) + +(defn minimum + ^double [^TDigest digest] + (.minimum digest)) + +(defn maximum + ^double [^TDigest digest] + (.maximum digest)) + +(defn mean + ^double [{:keys [centroids] :as ^TDigest digest}] + (let [sum-weights (.total-weight digest) + weighted-sum (util/reduce-double-vector + (fn ^double [^double acc ^Centroid centroid] + (+ acc (* (.mean centroid) (.weight centroid)))) + 0.0 + centroids)] + (/ weighted-sum sum-weights))) + +(defn variance + (^double [digest] + (variance digest (mean digest))) + (^double [{:keys [centroids] :as ^TDigest digest} ^double mean] + (let [sum-weights (.total-weight digest) + sum-squares (util/reduce-double-vector + (fn ^double [^double acc ^Centroid centroid ] + (+ acc + (* (* (.mean centroid) + (.mean centroid)) + (.weight centroid)))) + 0.0 + centroids) + e-x-squared (/ sum-squares sum-weights)] + (- e-x-squared (* mean mean))))) + +(defn centroid-means + [digest] + (mapv centroid-mean (:centroids digest))) diff --git a/bases/criterium/src/criterium/util/t_digest/scale.clj b/bases/criterium/src/criterium/util/t_digest/scale.clj new file mode 100644 index 0000000..179e5d8 --- /dev/null +++ b/bases/criterium/src/criterium/util/t_digest/scale.clj @@ -0,0 +1,199 @@ +(ns criterium.util.t-digest.scale + "Scale functions for t-digest algorithm. + These control how cluster sizes are determined and affect accuracy in different ways." + (:require [criterium.util.invariant :refer [have?]])) + +(defn finite? [^double x] + (and (not (NaN? x)) (not (infinite? x)))) + +(definterface Scale + ;; "Convert quantile q to k-scale value using normalized compression." + (k [ ^double q ^double normalizer]) + (k [ ^double q ^double compression ^double n]) + ;; "Convert k-scale value back to quantile q using normalized compression." + (q [ ^double k ^double normalizer]) + (q [ ^double k ^double compression ^double n]) + ;; "Maximum allowed cluster size at quantile q using normalized compression." + (max_size [ ^double q ^double normalizer]) + (max_size[ ^double q ^double compression ^double n]) + ;; "Normalizing factor for compression." + (normalizer [ ^double compression ^double n])) + +(defn k + "Convert quantile q to k-scale value using normalized compression." + (^double [^Scale scale ^double q ^double normalizer] + {:pre [(have? finite? q)(have? finite? normalizer)] + :post [#(have? finite? %)]} + (.k scale q normalizer)) + (^double [^Scale scale ^double q ^double compression ^double n] + {:pre [(have? finite? q)(have? finite? compression)(have? finite? n)] + :post [#(have? finite? %)]} + (.k scale q compression n))) + +(defn q + "Convert k-scale value back to quantile q using normalized compression." + (^double [^Scale scale ^double k ^double normalizer] + {:pre [(have? finite? k)(have? finite? normalizer)] + :post [#(have? finite? %)]} + (.q scale k normalizer)) + (^double [^Scale scale ^double k ^double compression ^double n] + {:pre [(have? finite? k)(have? finite? compression)(have? finite? n)] + :post [#(have? finite? %)]} + (.q scale k compression n))) + +(defn max-size + "Maximum allowed cluster size at quantile q using normalized compression." + (^double [^Scale scale ^double q ^double normalizer] + {:pre [(have? finite? q)(have? finite? normalizer)] + :post [#(have? finite? %)]} (.max_size scale q normalizer)) + (^double [^Scale scale ^double q ^double compression ^double n] + {:pre [(have? finite? q)(have? finite? compression)(have? finite? n)] + :post [#(have? finite? %)]} + (.max_size scale q compression n))) + +(defn normalizer + ;; "Normalizing factor for compression." + ^double [^Scale scale ^double compression ^double n] + {:pre [(have? finite? compression)(have? finite? n)] + :post [#(have? finite? %)]} + (.normalizer scale compression n)) + +(defn limit ^double [^double x ^double low ^double high] + (cond + (< x low) low + (> x high) high + :else x)) + +(defn bound [^double x] +(cond + (< x 0.0) 0.0 + (> x 1) 1 + :else x)) + +(def k0 + "Scale function that generates uniform cluster sizes. + Used mainly for testing and comparison." + (reify Scale + (k [_ q normalizer] + (* normalizer q)) + (k [_ q compression _n] + (/ (* compression q) 2.0)) + + (q [_ k normalizer] + (/ k normalizer)) + (q [_ k compression _] + (/ (* 2 k) compression)) + + (max_size [_ _ normalizer] + (/ 1.0 normalizer)) + (max-size [_ _ compression _n] + (/ 2.0 compression)) + + (normalizer [_ compression _n] + (/ compression 2.0)))) + +(def ^:const k1-q-limit-low 1e-15) +(def ^:const k1-q-limit-high (- 1.0 1e-15)) + +(def ^:const k1-k-limit-f-low (/ (Math/asin (- (* 2.0 k1-q-limit-low) 1.0)) + (* 2.0 Math/PI))) +(def ^:const k1-q-limit-f-high (/ (Math/asin (- (* 2.0 k1-q-limit-high) 1.0)) + (* 2.0 Math/PI))) + +(def ^:const k1-x-limit-low (Math/asin (- (* 2.0 k1-q-limit-low) 1.0))) +(def ^:const k1-x-limit-high (Math/asin (- (* 2.0 k1-q-limit-high) 1.0))) + +(def k1 + "Scale function that generates cluster sizes proportional to sqrt(q*(1-q)). + Gives constant relative accuracy if accuracy is proportional to + squared cluster size." + (reify Scale + (k [_ q compression _n] + (let [q (limit q k1-q-limit-low k1-q-limit-high)] + (/ (* compression (Math/asin (- (* 2.0 q) 1.0))) + (* 2.0 Math/PI)))) + (k [_ q normalizer] + (let [q (limit q k1-q-limit-low k1-q-limit-high)] + (* normalizer (Math/asin (- (* 2.0 q) 1.0))))) + + (q [_ k compression n] + (let [k (limit + k + (* compression k1-q-limit-f-high) + (* compression k1-q-limit-high))] + (/ (+ 1.0 (Math/sin (* k (/ (* 2.0 Math/PI) compression))) 1.0) 2.0) )) + + (q [_ k normalizer] + (let [x (/ k normalizer) + x (limit x k1-x-limit-low k1-x-limit-high)] + (/ (+ (Math/sin x) 1.0) 2.0))) + + (max-size [_ q compression _n] + (if (or (<= q 0.0) (>= q 1.0)) + 0.0 + (* 2.0 + (Math/sin (/ Math/PI compression)) + (Math/sqrt (* q (- 1.0 q)))))) + (max-size [_ q normalizer] + (if (or (<= q 0.0) (>= q 1.0)) + 0.0 + (* 2.0 + (Math/sin (/ 0.5 normalizer)) + (Math/sqrt (* q (- 1.0 q)))))) + + (normalizer [_ compression _n] + (/ compression (* 2.0 Math/PI))))) + +;; (def ^:const xx (Math/asin (- (* 2.0 1e-15) 1.0))) + +;; (Math/asin -1.0) +;; (/ Math/PI 2) +;; (doseq [^double c (range 1.0 10.0 1.0)] +;; (prn) +;; (prn :c c) +;; (prn :k-limit (k k1 0.0 c)) +;; (prn :q-for-k-limit (q k1 (k k1 0.0 c) c)) +;; (prn :k-limit' (/ c 4.0)) +;; (prn :xx (/ c xx))) + + +(defn- zk2 ^double [^double compression ^double n] + (+ (* 4.0 (Math/log (/ n compression)) ) 24.0)) + +(def k2 + "Scale function that generates cluster sizes proportional to q*(1-q). + Makes tail error bounds tighter than K1." + (reify Scale + (k [_ q compression n] + (if (<= n 1.0) + (cond + (<= q 0.0) -10.0 + (>= q 1.0) 10.0 + :else 0.0) + (let [q (limit q k1-q-limit-low k1-q-limit-high)] + (/ (* compression (Math/log (/ q (- 1.0 q))) ) + (zk2 compression n))))) + (k [_ q normalizer] + (let [q (limit q k1-q-limit-low k1-q-limit-high)] + (* normalizer (Math/log (/ q (- 1.0 q))) ) )) + + (q [_ k compression n] + (let [w (Math/exp (/ (* k (zk2 compression n)) compression))] + (if (infinite? w) + 1.0 + (/ w (+ 1.0 w))))) + + (q [_ k normalizer] + (let [w (Math/exp (/ k normalizer))] + (if (infinite? w) + 1.0 + (/ w (+ 1.0 w))))) + + (max-size [_ q compression n] + (/ (* (zk2 compression n) q (- 1.0 q)) compression)) + + (max-size [_ q normalizer] + (/ (* q (- 1.0 q)) normalizer)) + + (normalizer [_ compression n] + (/ compression (zk2 compression n))))) diff --git a/bases/criterium/src/criterium/util/units.clj b/bases/criterium/src/criterium/util/units.clj new file mode 100644 index 0000000..d668b26 --- /dev/null +++ b/bases/criterium/src/criterium/util/units.clj @@ -0,0 +1,8 @@ +(ns criterium.util.units) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def ^Long NANOSEC-NS 1) +(def ^Long MICROSEC-NS 1000) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def ^Long MILLISEC-NS 1000000) +(def ^Long SEC-NS 1000000000) diff --git a/bases/criterium/src/criterium/util/well.clj b/bases/criterium/src/criterium/util/well.clj new file mode 100644 index 0000000..188ab3c --- /dev/null +++ b/bases/criterium/src/criterium/util/well.clj @@ -0,0 +1,79 @@ +;; Copyright (c) Hugo Duncan. All rights reserved. + +;; The use and distribution terms for this software are covered by the +;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this distribution. +;; By using this software in any fashion, you are agreeing to be bound by +;; the terms of this license. +;; You must not remove this notice, or any other, from this software. + +;; Improved Long-Period Generators Based on Linear Recurrences Modulo 2 +;; F. Panneton, P. L'Ecuyer and M. Matsumoto +;; http://www.iro.umontreal.ca/~panneton/WELLRNG.html + +(ns criterium.util.well) + + +;; Macros to help convert unsigned algorithm to our implementation with signed +;; integers. +;; unsign is used to convert the [0.5,-0.5] range back onto [1,0] + +(defmacro bit-shift-right-ns + "A bit shift that doesn't do sign extension." + [a b] + `(let [n# ~b] + (if (neg? n#) + (bit-shift-left ~a (- n#)) + (bit-and + (bit-shift-right Integer/MAX_VALUE (dec n#)) + (bit-shift-right ~a n#))))) + +(defmacro unsign + "Convert a result based on a signed integer, and convert it to what it would + have been for an unsigned integer." + [x] + `(let [v# ~x] + (if (neg? v#) (+ 1 v#) v#))) + +(defn int-max ^long [] (bit-or (bit-shift-left Integer/MAX_VALUE 1) 1)) + +(defmacro limit-bits [x] + `(bit-and (int-max) ~x)) + +(defmacro mat0-pos [t v] + `(let [v# ~v] (bit-xor v# (bit-shift-right v# ~t)))) + +(defmacro mat0-neg [t v] + `(let [v# ~v] + (bit-xor v# (limit-bits (bit-shift-left v# (unchecked-negate ~t)))))) + +(defmacro add-mod-32 [a b] + `(bit-and (unchecked-add ~a ~b) 0x01f)) + +(defn well-rng-1024a + "Well RNG 1024a + See: Improved Long-Period Generators Based on Linear Recurrences Modulo 2 + F. Panneton, P. L'Ecuyer and M. Matsumoto + http://www.iro.umontreal.ca/~panneton/WELLRNG.html" + ([] (well-rng-1024a + (long-array 32 (repeatedly 32 #(rand-int Integer/MAX_VALUE))) + (rand-int 32))) + ([^longs state ^long index] + {:pre [(<= 0 index 31)]} + (let [m1 3 + m2 24 + m3 10 + fact 2.32830643653869628906e-10 + new-index (add-mod-32 index 31) + z0 (aget state new-index) + z1 (bit-xor (aget state index) + (mat0-pos 8 (aget state (add-mod-32 index m1)))) + z2 (bit-xor (mat0-neg -19 (aget state (add-mod-32 index m2))) + (mat0-neg -14 (aget state (add-mod-32 index m3))))] + (aset state index (bit-xor z1 z2)) + (aset state new-index + (bit-xor (bit-xor (mat0-neg -11 z0) (mat0-neg -7 z1)) + (mat0-neg -13 z2))) + (lazy-seq + (cons (unsign (* (aget state new-index) fact)) + (well-rng-1024a state new-index)))))) diff --git a/bases/criterium/src/criterium/util/ziggurat.clj b/bases/criterium/src/criterium/util/ziggurat.clj new file mode 100644 index 0000000..6261bc2 --- /dev/null +++ b/bases/criterium/src/criterium/util/ziggurat.clj @@ -0,0 +1,95 @@ +(ns criterium.util.ziggurat + (:require + [criterium.util.well :as well])) + +(def ^:dynamic ^Long *zignor-c* 128) ; "Number of blocks." +;; "Start of the right tail" (R * phi(R) + Pr(X>=R)) * sqrt(2\pi) +(def ^:dynamic ^Double *zignor-r* 3.442619855899e0) +(def ^:dynamic ^Double *zignor-v* 9.91256303526217e-3) + +(defmacro sqr [x] `(let [x# ~x] (* x# x#))) + +(defn zignor-init + "Initialise tables." + [c r v] + (let [c (int c) + r (double r) + v (double v) + #^doubles s-adzigx (double-array (inc c)) + #^doubles s-adzigr (double-array c) + f (Math/exp (* -0.5e0 r r))] + (aset s-adzigx 0 (/ v f)) ;; [0] is bottom block: V / f(R) + (aset s-adzigx 1 r) + (aset s-adzigx c (double 0.0)) + (loop [i (int 2) + f f] + (aset s-adzigx i + (Math/sqrt (* -2e0 (Math/log (+ (/ v (aget s-adzigx (dec i))) f))))) + (when (< i c) + (recur + (inc i) + (Math/exp (* -0.5e0 (sqr (aget s-adzigx i))))))) + + (for [#^Integer i (range c)] + (let [j (int i)] + (aset s-adzigr j (/ (aget s-adzigx (inc j)) (aget s-adzigx j))))) + [s-adzigr s-adzigx r (dec c)])) + +(defn random-normal-zig + "Pseudo-random normal variates. + An implementation of ZIGNOR + See: + An improved Ziggurat method to generate normal random samples, Doornik, 2005" + ([] + (random-normal-zig (well/well-rng-1024a) + (zignor-init *zignor-c* *zignor-r* *zignor-v*))) + ([rng-seq] + (random-normal-zig rng-seq (zignor-init *zignor-c* *zignor-r* *zignor-v*))) + ([rng-seq c r v] (random-normal-zig rng-seq (zignor-init c r v))) + ([c r v] + (random-normal-zig (well/well-rng-1024a) (zignor-init c r v))) + ([rng-seq [#^doubles s-adzigr #^doubles s-adzigx ^Double zignor-r mask]] + (letfn [(random-normal-tail + [^double min-r negative rng-seq] + (loop [rng-seq rng-seq] + (let [l (Math/log ^Double (first rng-seq)) + x (/ l min-r) + y (Math/log (first (next rng-seq)))] + (if (>= (* -2e0 y) (* x x)) + (if negative + [(- x min-r) (drop 2 rng-seq)] + [(- min-r x) (drop 2 rng-seq)]) + (recur (drop 2 rng-seq))))))] + (let [zignor-r (double zignor-r) + mask (int mask) + [deviate rng-seq] + (loop [rng-seq rng-seq] + (let [r (double (first rng-seq)) + u (double (- (* 2e0 r) 1e0)) + i (bit-and + (int (* Integer/MAX_VALUE + (double ^Double (first (drop 1 rng-seq))))) + mask)] + ;; first try the rectangular boxes + (if (< (Math/abs u) (aget s-adzigr i)) + [(* u (aget s-adzigx i)) (drop 2 rng-seq)] + + ;; bottom box: sample from the tail + (if (zero? i) + (random-normal-tail zignor-r (neg? u) (drop 2 rng-seq)) + + ;; is this a sample from the wedges? + (let [x (* u (aget s-adzigx i)) + f0 (Math/exp + (* -0.5e0 + (- (sqr (aget s-adzigx i)) (sqr x)))) + f1 (Math/exp + (* -0.5e0 + (- (sqr (aget s-adzigx (inc i))) + (sqr x))))] + (if (< (+ f1 (* (double ^Double (first (drop 2 rng-seq))) + (- f0 f1))) + 1.0) + [x (drop 3 rng-seq)] + (recur (drop 3 rng-seq))))))))] + (lazy-seq (cons deviate (random-normal-zig rng-seq))))))) diff --git a/bases/criterium/src/criterium/view.clj b/bases/criterium/src/criterium/view.clj new file mode 100644 index 0000000..395b1a3 --- /dev/null +++ b/bases/criterium/src/criterium/view.clj @@ -0,0 +1,72 @@ +(ns criterium.view + (:require + [criterium.util.debug :as debug] + [criterium.util.invariant :refer [have]])) + +(defn def-multi-view* [n] + (let [mm-name (symbol (str (name n) "*"))] + `(do + (ns-unmap *ns* '~mm-name) + (defmulti ~mm-name + (fn [viewer# options# data-map#] + (have keyword? viewer#))) + (defn ~n + ([] (~n {})) + ([options#] + (fn ~n [viewer# data-map#] + (debug/dtap> {:view '~n}) + (~mm-name viewer# options# data-map#))))))) + +(defmacro def-multi-view [n] + (def-multi-view* n)) + +(def-multi-view bootstrap-stats) +(def-multi-view event-stats) +(def-multi-view final-gc-warnings) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view histogram) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view metrics) +(def-multi-view os) +(def-multi-view outlier-counts) +(def-multi-view outlier-significance) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view quantiles) +(def-multi-view runtime) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view sample-percentiles) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view sample-diffs) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view collect-plan) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def-multi-view samples) +(def-multi-view stats) + +(defmulti flush-viewer (fn [viewer] viewer)) +(defmethod flush-viewer :default [_]) + +;; Null Viewer + +(defmethod bootstrap-stats* :none [_ _ _]) +(defmethod event-stats* :none [_ _ _]) +(defmethod final-gc-warnings* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod histogram* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod metrics* :none [_ _ _]) +(defmethod os* :none [_ _ _]) +(defmethod outlier-counts* :none [_ _ _]) +(defmethod outlier-significance* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod quantiles* :none [_ _ _]) +(defmethod runtime* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod sample-percentiles* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod sample-diffs* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod collect-plan* :none [_ _ _]) +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defmethod samples* :none [_ _ _]) +(defmethod stats* :none [_ _ _]) diff --git a/bases/criterium/src/criterium/viewer/common.clj b/bases/criterium/src/criterium/viewer/common.clj new file mode 100644 index 0000000..ebd7ef6 --- /dev/null +++ b/bases/criterium/src/criterium/viewer/common.clj @@ -0,0 +1,196 @@ +(ns criterium.viewer.common + (:require + [clojure.string :as str] + [criterium.metric :as metric] + [criterium.util.format :as format] + [criterium.util.helpers :as util] + [criterium.util.histogram :as histogram] + [criterium.util.invariant :refer [have have?]])) + +(defn metrics-map + [sample metrics] + (reduce + (fn [res metric] + (conj res + {:metric (:label metric) + :value (format/format-value + (:dimension metric) + (* (double (first (sample (:path metric)))) + (double (:scale metric))))})) + [] + metrics)) + +(defn stats-map + [stats metric-configs transforms] + (reduce + (fn [res metric] + (let [stat (util/transform-vals-> + (get-in stats (:path metric)) + transforms) + min-val (double (:min-val stat)) + metric-scale (double (:scale metric)) + [scale label] (format/scale + (:dimension metric) + (* metric-scale min-val)) + scale (* (double scale) metric-scale)] + (conj res + (reduce + (fn add-key-k [res k] + (assoc res k + (format/round (* (double (get stat k)) scale) 4))) + {:_metric (str (:label metric) " " label)} ; underscore so it sorts first + [:mean :min-val :mean-minus-3sigma :mean-plus-3sigma :max-val])))) + [] + (filterv (metric/type-pred :quantitative) metric-configs))) + +(defn composite-key [path] + (keyword (str/join "-" (mapv name path)))) + +(defn event-stats-metrics + [event-stats k metric ms] + {:post [(have? (some-fn nil? map?) %)]} + (let [sample-count-path (conj (pop (:path (first ms))) :sample-count) + sample-count (event-stats sample-count-path)] + (when (and sample-count (pos? (long sample-count))) + (reduce + (fn [res m] + (assoc res + (composite-key (rest (:path m))) + (format/format-value + (:dimension (have :dimension m)) + (* (double (get event-stats (:path m))) + (double (:scale m)))))) + {:metric (:label metric)} + (into [{:path sample-count-path + :dimension :count + :scale 1}] + ms))))) + +(defn event-stats + [metrics-defs ev-stats] + {:pre [ev-stats] + :post [(have? vector? %)]} + (reduce-kv + (fn [res k metric] + (if-let [groups (:groups metric)] + (into res (event-stats groups ev-stats)) + (if-let [m (event-stats-metrics ev-stats k metric (:values metric))] + (conj res m) + res))) + [] + metrics-defs)) + +(defn quantiles + [metric-configs all-quantiles transforms] + {:pre [(have? all-quantiles)]} + (reduce + (fn [res metric-config] + (let [quantiles (get-in all-quantiles (:path metric-config)) + median-val (double + (util/transform-sample-> (quantiles 0.5) transforms)) + metric-scale (double (:scale metric-config)) + [scale unit] (format/scale + (:dimension metric-config) + (* metric-scale median-val)) + scale (* (double scale) metric-scale)] + (conj res + (reduce-kv + (fn [res q v] + (assoc + res + q + (format/round + (* (util/transform-sample-> (double v) transforms) scale) + 4))) + {:metric (str (:label metric-config) " " unit)} + quantiles)))) + [] + metric-configs)) + +(defn outlier-counts + [metrics outliers] + (reduce + (fn [res metric] + (let [mcs (:outlier-counts (get-in outliers (:path metric)))] + (if (some pos? (vals mcs)) + (conj res (assoc mcs :_metric (:label metric))) + res))) + [] + metrics)) + +(defn- sampled-scheme-data + [sampled] + (when sampled + (assoc + (select-keys sampled [:batch-size :num-samples]) + :num-evals + (* (long (:num-samples sampled)) (long (:batch-size sampled)))))) + +(defn collect-plan-data + [bench-map] + (let [samples-schema (sampled-scheme-data + (-> bench-map :samples)) + warmup-scheme (sampled-scheme-data + (some-> bench-map :warmup)) + estimation-scheme (sampled-scheme-data + (some-> bench-map :estimation))] + (cond-> [(merge {:phase :sample} samples-schema)] + warmup-scheme + (conj (merge {:phase :warmup} warmup-scheme)) + estimation-scheme + (conj (merge {:phase :estimation} estimation-scheme))))) + +(defn column-data->maps + "Convert data where each column's values are stored in vectors." + [column-data column-keys column-tforms] + (let [make-row (fn [& vals] + (zipmap + column-keys + (mapv + (fn [k v] ((column-tforms k identity) v)) + column-keys vals)))] + (apply mapv make-row (map column-data column-keys)))) + +(defn- remove-outliers + [samples outliers] + (into [] (comp + (map-indexed + (fn [i s] (when-not (outliers i) s))) + (filter some?)) + samples)) + +(defn histogram + [metric->values quantiles outliers transforms metric-config] + (try + (let [p (:path metric-config) + iqr (when-let [qs (get-in quantiles p)] + (- (double (get qs 0.75)) (double (get qs 0.25)))) + samples (metric->values p) + outliers (get-in outliers p) + samples (if-let [ols (:outliers outliers)] + (remove-outliers samples ols) + samples) + res (histogram/histogram samples iqr) + transform #(util/transform-sample-> % transforms) + min-val (double (transform (:min res))) + metric-scale (double (:scale metric-config)) + [scale unit] (format/scale + (:dimension metric-config) + (* metric-scale min-val)) + scale (* (double scale) metric-scale) + round #(format/round % 4) + t-center (comp round (partial * scale) transform) + t-density #(format/round % 3)] + (-> res + (update :centers #(mapv t-center %)) + (update :min t-center) + (update :max t-center) + (update :density #(mapv t-density %)) + (assoc + :metric-config metric-config + :unit unit))) + (catch clojure.lang.ExceptionInfo e + (let [data (ex-data e)] + (when-not (#{:histogram/no-values :histogram/same-values} + (:error data)) + (throw e)))))) diff --git a/bases/criterium/src/criterium/viewer/portal.clj b/bases/criterium/src/criterium/viewer/portal.clj new file mode 100644 index 0000000..0ec8a09 --- /dev/null +++ b/bases/criterium/src/criterium/viewer/portal.clj @@ -0,0 +1,488 @@ +(ns criterium.viewer.portal + "A viewer that outputs to portal using tap>." + (:refer-clojure :exclude [flush]) + (:require + [criterium.metric :as metric] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.util.probability :as probability] + [criterium.view :as view] + [criterium.viewer.common :as viewer-common])) + + +(defonce tapped (atom {:values '()})) + +(defn submit + "Tap target function. + + This allows criterium to control the order of tapped output. + + ```clojure + (def submit (criterium.portal/submit #'portal.api/submit)) + (add-tap #'submit)` + (remove-tap #'submit) + ``" + [portal-submit] + (swap! tapped assoc :portal-submit portal-submit) + (fn + [value] + (swap! tapped update :values conj value))) + +(defn flush + "Flush tapped output" + [] + (tap> ::_) + (loop [i 0] + (when (not= ::_ (first (:values @tapped))) + (when (< i 1000) + (Thread/yield) + (recur (unchecked-inc i))))) + + (let [[{:keys [portal-submit values]}] (swap-vals! tapped assoc :values '())] + (doseq [value values] + (when (not= ::_ value) + (portal-submit value))))) + +(defmethod view/flush-viewer :portal [_] + (flush)) + +(defn portal-heading [s] + (tap> (with-meta s {:portal.viewer/default :portal.viewer/hiccup}))) + +(defn portal-table [s] + (tap> (with-meta s {:portal.viewer/default :portal.viewer/table}))) + +(defn portal-vega-lite [s] + (tap> (with-meta + (assoc s :$schema "https://vega.github.io/schema/vega-lite/v5.json") + {:portal.viewer/default :portal.viewer/vega-lite}))) + +(defn heading [s] + (portal-heading [:b s])) + +(defmethod view/metrics* :portal + [_ {:keys [samples-id]} data-map] + (let [samples-id (or samples-id :samples) + metrics-samples (data-map samples-id) + metrics-defs (:metrics-defs metrics-samples) + metric-configs (metric/all-metric-configs metrics-defs)] + (portal-table + (viewer-common/metrics-map + (util/metric->values metrics-samples) + metric-configs)))) + +(defmethod view/stats* :portal + [_ {:keys [stats-id metric-ids]} data-map] + (let [stats-id (or stats-id :stats) + stats-map (data-map stats-id) + metrics-defs (-> (:metrics-defs stats-map) + (metric/select-metrics metric-ids)) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map stats-id)] + (heading "Summary stats") + (portal-table + (viewer-common/stats-map + (util/stats stats-map) + metric-configs + transforms)))) + +(defmethod view/event-stats* :portal + [_ {:keys [event-stats-id]} data-map] + (let [event-stats-id (or event-stats-id :event-stats) + event-stats-map (data-map event-stats-id) + metrics-defs (have (:metrics-defs event-stats-map))] + (heading "Event stats") + (portal-table + (viewer-common/event-stats + metrics-defs + (util/event-stats event-stats-map))))) + +(defmethod view/quantiles* :portal + [_ {:keys [quantiles-id]} data-map] + (let [quantiles-id (or quantiles-id :quantiles) + quantiles-map (data-map quantiles-id) + metrics-defs (:metrics-defs quantiles-map) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map quantiles-id)] + (heading "Quantiles") + (portal-table + (viewer-common/quantiles + metric-configs + (util/quantiles quantiles-map) + transforms)))) + +(defmethod view/outlier-counts* :portal + [_ {:keys [outliers-id] :as _view} data-map] + (let [outliers-id (or outliers-id :outliers) + outliers-map (data-map outliers-id) + metrics-defs (:metrics-defs outliers-map) + metric-configs (metric/all-metric-configs metrics-defs)] + (heading "Outliers") + (portal-table + (viewer-common/outlier-counts + metric-configs + (util/outliers outliers-map))))) + +(defmethod view/outlier-significance* :portal + [_ {:keys [outlier-significance-id] :as _view} data-map] + (let [outlier-sig-id (or outlier-significance-id :outlier-significance) + outlier-sig-map (data-map outlier-sig-id) + outlier-sig (util/outlier-significance outlier-sig-map) + metrics-defs (:metrics-defs outlier-sig-map) + metric-configs (metric/all-metric-configs metrics-defs)] + (heading "Outlier Significance") + (portal-table + (vec + (for [m metric-configs] + (get-in outlier-sig (:path m))))))) + +(defmethod view/collect-plan* :portal + [_ _view data-map] + (heading "Collect plan") + (portal-table + (viewer-common/collect-plan-data data-map))) + +(defn metric-layer + [metric->values transforms outliers metric] + {:pre [(have? map? metric->values)]} + (let [path (:path metric) + k (first path) + field-name (name k) + data (mapv + #(let [outlier (get + (:outliers (get-in outliers path)) + %2 + "")] + (assoc + {k + (util/transform-sample-> + %1 + transforms)} + :index %2 + :outlier outlier)) + (have some? + (metric->values path) + {:path path :available (keys metric->values)}) + (range))] + {:data {:values data} + :encoding {:x {:field "index" :type "quantitative"} + :y {:field field-name + :type "quantitative" + :scale {:zero false}} + :tooltip [{:field "index" :type "quantitative"} + {:field field-name :type "quantitative"}] + :color {:field "outlier"}} + :mark "point"})) + +(defn metric-histo-layer + [metric->values transforms outliers metric] + {:pre [metric->values]} + (let [path (:path metric) + k (first path) + field-name (name k) + data (mapv + #(let [outlier (get + (:outliers (get-in outliers path)) + %2 + "")] + (assoc + {k + (util/transform-sample-> + %1 + transforms)} + :index %2 + :outlier outlier)) + (metric->values path) + (range))] + {:data {:values data} + :encoding {:y {:aggregate "count" + :type "quantitative"} + :x {:field field-name + :type "quantitative" + :bin {:maxbins 100} + :scale {:zero false}} + :color {:field "outlier"}} + :mark "bar"})) + +(defn normal-pdf-points + [min-val max-val mean variance transforms] + (let [sigma (Math/sqrt variance) + delta (/ (- (double max-val) (double min-val)) 120) + pdf (probability/normal-pdf mean sigma)] + (mapv + (fn [z] + {:z (util/transform-sample-> z transforms) + :p (pdf z)}) + (range min-val max-val delta)))) + +(defn metric-sample-stats-layer + [transforms stats metric-config] + (let [{:keys [mean-minus-3sigma mean-plus-3sigma mean variance]} + stats + path (:path metric-config) + k (first path) + data (normal-pdf-points + mean-minus-3sigma + mean-plus-3sigma + mean + variance + transforms)] + [{:resolve {:scale {:y "shared"}} + :layer + [{:data {:values data} + :encoding {:x {:field "z" + :type "quantitative" + :scale {:zero false}} + :y {:field "p" + :type "quantitative"} + :tooltip [{:field (name k) + :title (str "Normal")}]} + :mark {:type "line"}} + {:data {:values [{k + (util/transform-sample-> mean transforms) + :title "mean"}]} + :encoding {:x {:field (name k) + :type "quantitative" + :scale {:zero false}} + :tooltip [{:field (name k) + :title (str "Mean " (name k))}]} + :mark "rule"}]}])) + +(defn event-occurance [events metrics index] + (reduce + (fn [res metric-config] + (let [path (:path metric-config) + v (get (get events path) index)] + (if (pos? (long v)) + (assoc res (viewer-common/composite-key path) v :index index) + res))) + nil + metrics)) + +(defn event-layer + [events [_k metrics]] + (let [data (->> (map + (partial event-occurance events (:values metrics)) + (range (-> events + (get (:path (first (:values metrics)))) + count))) + (filterv some?))] + (when (seq data) + [{:data {:values data} + :encoding {:x {:field "index" + :type "quantitative" + ;; :title title + } + :color {:vvalue "white"} + :size {:value 2}, + :tooltip (conj + (mapv + #(hash-map + :field (name + (viewer-common/composite-key (:path %))) + :type "quantitative" + :title (str (:label metrics) " " (:label %))) + (:values metrics)) + {:field "index" :type "quantitative"})} + :mark {:type "rule" + :strokeDash [2 2]}}]))) + +(defmethod view/samples* :portal + [_ {:keys [] :as view} data-map] + (let [quant-samples-id (:samples-id view :samples) + event-samples-id (:event-samples-id view quant-samples-id) + outliers-analysis-id (:outliers-id view :outliers) + + quant-samples (data-map quant-samples-id) + event-samples (data-map event-samples-id) + outliers (data-map outliers-analysis-id) + + q-metrics-defs (-> (:metrics-defs quant-samples) + (metric/filter-metrics + (metric/type-pred :quantitative))) + e-metrics-defs (-> (:metrics-defs event-samples) + (metric/filter-metrics + (metric/type-pred :event))) + metric-configs (metric/all-metric-configs q-metrics-defs) + e-metric-configs (metric/all-metric-configs e-metrics-defs) + + transforms (util/get-transforms data-map quant-samples-id)] + (heading "Samples") + (portal-vega-lite + {:data {:values [{}]} + :encoding {:x {:field "index" :type "quantitative"}} + :resolve {:scale {:y "independent"}} + :vconcat + (into + [{:height 800 + :layer + (vec + (into + [(metric-layer + (util/metric->values quant-samples) + transforms + (when outliers (util/outliers outliers)) + (have (first metric-configs)))] + (mapcat + #(event-layer (util/metric->values event-samples) %) + e-metrics-defs)))}] + (mapv + #(metric-layer + (util/metric->values quant-samples) + transforms + outliers %) + e-metric-configs))}))) + +(defmethod view/histogram* :portal + [_ {:keys [samples-id stats-id] :as view} data-map] + (let [stats-id (or stats-id :stats) + quant-samples-id (or samples-id :samples) + outlier-analysis-id (:outlier-id view :outliers) + quant-samples (data-map quant-samples-id) + outlier-analysis (data-map outlier-analysis-id) + stats (data-map stats-id) + metrics-defs (-> (:metrics-defs quant-samples) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map quant-samples-id) + stats-transforms (util/get-transforms data-map (:source-id stats))] + (heading "Histogram") + (portal-vega-lite + {:data {:values []} + :resolve {:scale {:x "independent" :y "independent"}} + :vconcat (mapv + (fn [metric-config] + {:resolve {:scale {:x "shared" :y "independent"}} + :height 800 + :layer + (into + [(metric-histo-layer + (util/metric->values quant-samples) + transforms + (util/outliers outlier-analysis) + metric-config)] + (when stats + (->> + (metric-sample-stats-layer + stats-transforms + (get-in (util/stats stats) (:path metric-config)) + metric-config))))}) + metric-configs)}))) + +(defn metric-percentile-layer + [matric->values transforms metric] + (let [path (:path metric) + k (first path) + field-name (name k) + vs (->> (matric->values path) + (map #(util/transform-sample-> % transforms)) + sort + vec) + n (count vs) + max-val (Math/log10 (double n)) + xs (mapv + #(/ (- max-val (Math/log10 (- n (double %)))) max-val) + (range 0 n)) + delta (/ 100.0 (dec n)) + percentiles (take n + (iterate + #(+ delta (double %)) 0)) + data (mapv + #(hash-map k %1 :p %2 :x %3) + vs + percentiles + xs)] + {:data {:values data + :name "vals"} + :height 800 + :encoding + {:x + {:field "x" :type "quantitative" + :scale {:domain [0 1.0]} + :axis + {:labelExpr + ;; inverse of xs + (format + "format( (%d - pow(%d, 1 - min(datum.index, 1.0))) / %d,'.3%%')", + n,n,(dec n)) + :tickCount 10}} + :y {:field field-name + :type "quantitative" + :scale {:zero false + :type "log"}} + :tooltip [{:field "p" :type "quantitative"} + {:field field-name :type "quantitative"}]} + :mark "point"})) + +(defmethod view/sample-percentiles* :portal + [_ {:keys [metric-ids] :as view} data-map] + (let [quant-samples-id (:samples-id view :samples) + quant-samples (data-map quant-samples-id) + metrics-defs (-> (:metrics-defs quant-samples) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map quant-samples-id)] + (heading "Percentiles") + (portal-vega-lite + {:data {:values [{}]} ; for portal + :resolve {:scale {:y "independent"}} + :vconcat + (into + [{:layer + (vec + (into + [(metric-percentile-layer + (util/metric->values quant-samples) + transforms + (first metric-configs))]))}])}))) + +(defn metric-diff-layer + [samples metric] + (let [path (:path metric) + k (first path) + field-name (name k) + vs (->> (get samples path) + sort + vec) + min-v (double (first vs)) + diffs (-> (mapv + #(- (double %) min-v) + vs) + sort + distinct + vec) + data (mapv + #(hash-map k %1 :x %2) + diffs + (range))] + {:data {:values data + :name "vals"} + :height 800 + :encoding + {:x + {:field "x" :type "quantitative"} + :y {:field field-name + :type "quantitative" + :scale {:zero false}} + :tooltip [{:field field-name :type "quantitative"}]} + :mark "point"})) + +(defmethod view/sample-diffs* :portal + [_ {:keys [] :as view} data-map] + (let [quant-samples-id (:samples-id view :samples) + quant-samples (data-map quant-samples-id) + metric-configs (:metric-configs quant-samples)] + (heading "Sample diffs") + (portal-vega-lite + {:data {:values [{}]} ; for portal + :resolve {:scale {:y "independent"}} + :vconcat + (into + [{:layer + (vec + (into + [(metric-diff-layer + (util/metric->values quant-samples) + (first metric-configs))]))}])}))) diff --git a/bases/criterium/src/criterium/viewer/pprint.clj b/bases/criterium/src/criterium/viewer/pprint.clj new file mode 100644 index 0000000..d657112 --- /dev/null +++ b/bases/criterium/src/criterium/viewer/pprint.clj @@ -0,0 +1,232 @@ +(ns criterium.viewer.pprint + "A pretty print viewer" + (:require + [clojure.pprint :as pprint] + [criterium.metric :as metric] + [criterium.util.helpers :as util] + [criterium.view :as view] + [criterium.viewer.common :as viewer-common])) + +(defmethod view/metrics* :pprint + [_ {:keys [samples-id]} data-map] + (let [samples-id (or samples-id :samples) + metrics-samples (data-map samples-id) + metrics-defs (:metrics-defs metrics-samples) + metric-configs (metric/all-metric-configs metrics-defs)] + (pprint/print-table + [:metric :value] + (viewer-common/metrics-map + (util/metric->values metrics-samples) + metric-configs)))) + +(defmethod view/stats* :pprint + [_ {:keys [stats-id metric-ids]} data-map] + (let [stats-id (or stats-id :stats) + stats-map (data-map stats-id) + metrics-defs (-> (:metrics-defs stats-map) + (metric/select-metrics metric-ids)) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map stats-id)] + (pprint/print-table + [:_metric :mean-minus-3sigma :mean :mean-plus-3sigma :min-val :max-val] + (viewer-common/stats-map + (util/stats stats-map) + metric-configs + transforms)))) + +(defmethod view/quantiles* :pprint + [_ {:keys [quantiles-id]} data-map] + (let [quantiles-id (or quantiles-id :quantiles) + quantiles-map (data-map quantiles-id) + metrics-defs (:metrics-defs quantiles-map) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map quantiles-id) + table (viewer-common/quantiles + metric-configs + (util/quantiles quantiles-map) + transforms)] + (pprint/print-table + (into [:metric] + (->> table first keys (filter #(not= % :metric)) sort)) + table))) + +(defmethod view/event-stats* :pprint + [_ {:keys [event-stats-id]} data-map] + (let [event-stats-id (or event-stats-id :event-stats) + event-stats-map (data-map event-stats-id) + metrics-defs (:metrics-defs event-stats-map) + res (viewer-common/event-stats + metrics-defs + (util/event-stats event-stats-map)) + ks (reduce into [] (map keys res))] + (pprint/print-table (distinct ks) res))) + +(defmethod view/outlier-counts* :pprint + [_ {:keys [outliers-id] :as _view} data-map] + (let [outliers-id (or outliers-id :outliers) + outliers-map (data-map outliers-id) + metrics-defs (:metrics-defs outliers-map) + metric-configs (metric/all-metric-configs metrics-defs)] + (pprint/print-table + [:_metric :low-severe :low-mild :high-mild :high-severe] + (viewer-common/outlier-counts + metric-configs + (util/outliers outliers-map))))) + +(defn print-outlier-significances + [{:keys [outlier-significance-id] :as _view} data-map] + (let [outlier-sig-id (or outlier-significance-id :outlier-significance) + outlier-sig-map (data-map outlier-sig-id) + outlier-sig (util/outlier-significance outlier-sig-map) + metrics-defs (:metrics-defs outlier-sig-map) + metric-configs (metric/all-metric-configs metrics-defs)] + (pprint/print-table + (for [m metric-configs] + (get-in outlier-sig (:path m)))))) + +(defmethod view/outlier-significance* :pprint + [_ view data-map] + (print-outlier-significances view data-map)) + +(defn- flatten-events [sample metrics-defs index] + (reduce-kv + (fn [res k metric-group] + (reduce + (fn [res metric-config] + (let [v (get (get sample (:path metric-config)) index)] + (if (pos? (long v)) + (assoc res + (viewer-common/composite-key + [(if-let [group (:group metric-config)] + group + k) + (last (:path metric-config))]) + v) + res))) + res + (or (:values metric-group) + (mapcat :values (vals (:groups metric-group)))))) + {} + metrics-defs)) + +(defn- outlier-values [outlier-analysis path index] + (when-let [v (some-> outlier-analysis + (get-in path) + :outliers + (get index))] + [[(viewer-common/composite-key [(last path) :outlier]) + v]])) + +(defmethod view/collect-plan* :pprint + [_ _view data-map] + (pprint/print-table + [:phase :num-samples :batch-size :num-evals] + (viewer-common/collect-plan-data data-map))) + +(defmethod view/samples* :pprint + [_ {:keys [] :as view} banech-map] + (let [quant-samples-id (:samples-id view :samples) + event-samples-id (:event-samples-id view quant-samples-id) + outlier-analysis-id (:outlier-id view :outliers) + quant-samples (banech-map quant-samples-id) + event-samples (banech-map event-samples-id) + outlier-analysis (banech-map outlier-analysis-id) + + metric-defs (metric/filter-metrics + (:metrics-defs quant-samples) + (metric/type-pred :quantitative)) + event-metrics-defs (metric/filter-metrics + (:metrics-defs event-samples) + (metric/type-pred :event)) + + metric-configs (metric/all-metric-configs metric-defs) + event-metric-configs (metric/all-metric-configs event-metrics-defs) + + transforms (util/get-transforms banech-map quant-samples-id) + + quant-ids (mapv (comp last :path) metric-configs) + event-keys (into + [] + (mapcat + (fn [[k metric-group]] + (reduce + (fn [res metric-config] + (conj res + (viewer-common/composite-key + [(if-let [group (:group metric-config)] + group + k) + (last (:path metric-config))]))) + [] + (or (:values metric-group) + (mapcat :values + (vals (:groups metric-group))))))) + event-metrics-defs) + outlier-keys (when outlier-analysis + (mapv + #(viewer-common/composite-key [(last %) :outlier]) + (mapv :path metric-configs))) + + all-keys (reduce into [:index] [quant-ids outlier-keys event-keys]) + data (mapv + (fn [index] + (reduce + merge + {:index index} + [(reduce + (fn [res path] + (into + (assoc res (last path) + (util/transform-sample-> + (get (get quant-samples path) index) + transforms)) + (outlier-values outlier-analysis path index))) + {} + (mapv :path metric-configs)) + (flatten-events + event-samples event-metrics-defs index)])) + (range (-> quant-samples + (get (:path (first metric-configs))) + count)))] + (pprint/print-table all-keys data))) + + +(defmethod view/histogram* :pprint + [_ {:keys [samples-id quantiles-id outliers-id] :as _view} data-map] + (let [samples-id (or samples-id :samples) + quantiles-id (or quantiles-id :quantiles) + outliers-id (or outliers-id :outliers) + metrics-samples (data-map samples-id) + quantiles (data-map quantiles-id) + outliers (data-map outliers-id) + metrics-defs (-> (:metrics-defs metrics-samples) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map samples-id) + histograms (->> metric-configs + (mapv + #(viewer-common/histogram + (util/metric->values metrics-samples) + (util/quantiles quantiles) + (util/outliers outliers) + transforms + %)) + (filterv some?))] + (doseq [h histograms] + (println (format "\nHistogram of %s %s" + (-> h :metric-config :label) + (:unit h))) + (pprint/print-table + [:centers :counts :density] + (viewer-common/column-data->maps + h + [:centers :counts :density] + {:centers #(format "%-7.3g" %) + :density #(format "%-7.3g" %)})) + (println))) ) + +(defmethod view/sample-percentiles* :pprint + [_ _view _banch-map] + ;; TODO + ) diff --git a/bases/criterium/src/criterium/viewer/print.clj b/bases/criterium/src/criterium/viewer/print.clj new file mode 100644 index 0000000..0307de9 --- /dev/null +++ b/bases/criterium/src/criterium/viewer/print.clj @@ -0,0 +1,401 @@ +(ns criterium.viewer.print + "A print viewer" + (:require + [clojure.string :as str] + [criterium.jvm :as jvm] + [criterium.metric :as metric] + [criterium.types :as types] + [criterium.util.format :as format] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have have?]] + [criterium.view :as view] + [criterium.viewer.common :as viewer-common])) + +(set! *unchecked-math* false) + +(defn print-metrics + [metrics metrics->values] + (doseq [m metrics] + (when-let [v (first (metrics->values (:path m)))] + (println + (format + "%36s: %s" + (:label m) + (format/format-value (:dimension m) (* v (:scale m)))))))) + +(defmethod view/metrics* :print + [_ {:keys [samples-id]} data-map] + (let [samples-id (or samples-id :samples) + metrics-samples (data-map samples-id) + metrics-defs (:metrics-defs metrics-samples) + metric-configs (metric/all-metric-configs metrics-defs)] + (print-metrics metric-configs (util/metric->values metrics-samples)))) + +(defn print-stat + [metric stat transforms] + (when (:mean stat) + (let [stat (util/transform-vals-> stat transforms) + [scale unit] (format/scale + (:dimension metric) + (* (:scale metric) (:mean stat))) + scale (* scale (:scale metric))] + (println + (format + "%32s: %s %s 3σ [%s %s] min %s" + (:label metric) + (format/format-scaled (:mean stat) scale) + unit + (format/format-scaled (:mean-minus-3sigma stat) scale) + (format/format-scaled (:mean-plus-3sigma stat) scale) + (format/format-scaled (:min-val stat) scale)))))) + +(defn print-stats + [metrics stats transforms] + (doseq [metric metrics] + (print-stat metric (get-in stats (:path metric)) transforms))) + +(defmethod view/stats* :print + [_ {:keys [stats-id metric-ids]} data-map] + (let [stats-id (or stats-id :stats) + stats-map (data-map stats-id) + metrics-defs (-> (:metrics-defs stats-map) + (metric/select-metrics metric-ids)) + metric-configs (metric/all-metric-configs metrics-defs)] + (print-stats + metric-configs + (util/stats stats-map) + (util/get-transforms data-map stats-id)))) + +(defn print-event-stats-metrics + [event-stats metric ms] + (let [sample-count-path + (conj (vec (butlast (:path (first ms)))) :sample-count)] + (when (and sample-count-path + (pos? (event-stats sample-count-path))) + (let [vals (mapv + (fn [m] + (format/format-value + (:dimension m) + (* (get event-stats (:path m)) + (:scale m)))) + (conj ms {:path sample-count-path + :dimension :count + :scale 1}))] + (println (apply format (:summary metric) (:label metric) vals)))))) + +(defn print-event-stats + [metrics-defs event-stats] + {:pre [event-stats]} + (doseq [[_k metric] metrics-defs] + (if-let [groups (:groups metric)] + (print-event-stats groups event-stats) + (print-event-stats-metrics event-stats metric (:values metric))))) + +(defmethod view/event-stats* :print + [_ {:keys [event-stats-id]} data-map] + (let [event-stats-id (or event-stats-id :event-stats) + event-stats-map (data-map event-stats-id) + metrics-defs (-> (:metrics-defs event-stats-map) + (metric/filter-metrics + (metric/type-pred :event))) + event-stats (util/event-stats event-stats-map)] + (print-event-stats metrics-defs event-stats))) + +(defn print-bootstrap-stat + [metric + {:keys [mean + mean-minus-3sigma + mean-plus-3sigma] + minval :min-val + :as stat}] + (assert minval stat) + (let [{:keys [dimension label]} metric + [scale units] (format/scale + dimension + (* (:scale metric) (:point-estimate mean))) + min-quantiles (:estimate-quantiles minval) + quantiles (:estimate-quantiles mean) + scale (* (:scale metric) scale)] + (println + (format "%36s: %.3g %s CI [%.3g %.3g] (%.3f %.3f)" + (str label " min") + (* scale (:point-estimate minval)) + units + (* scale (-> min-quantiles first :value)) + (* scale (-> min-quantiles second :value)) + (-> min-quantiles first :alpha) + (-> min-quantiles second :alpha))) + (println + (format "%36s: %.3g %s CI [%.3g %.3g] (%.3f %.3f)" + (str label " mean") + (* scale (:point-estimate mean)) + units + (* scale (-> quantiles first :value)) + (* scale (-> quantiles second :value)) + (-> quantiles first :alpha) + (-> quantiles second :alpha))) + (println + (format "%36s: [%.3g %.3g] %s " + (str label " 3σ") + (* scale (:point-estimate mean-minus-3sigma)) + (* scale (:point-estimate mean-plus-3sigma)) + units)))) + +(defn print-bootstrap-stats + [{:keys [bootstrap-stats-id]} data-map] + (let [bootstrap-stats-id (or bootstrap-stats-id :bootstrap-stats) + bootstrap-map (data-map bootstrap-stats-id) + metrics-defs (:metrics-defs bootstrap-map) + metric-configs (metric/all-metric-configs metrics-defs) + bootstrap (util/bootstrap bootstrap-map)] + (doseq [metric metric-configs] + (when-let [stat (get-in bootstrap (:path metric))] + (print-bootstrap-stat metric stat))))) + +(defmethod view/bootstrap-stats* :print + [_ view data-map] + (print-bootstrap-stats view data-map)) + +(defn print-final-gc-warnings + [{:keys [final-gc-id samples-id warn-threshold]} data-map] + {:pre [(number? warn-threshold)]} + (let [final-gc-id (or final-gc-id :final-gc) + samples-id (or samples-id :samples) + metrics-samples (data-map samples-id) + metrics-deps (:metrics-deps metrics-samples) + gc-metric-configs (metric/all-metric-configs + (select-keys + metrics-deps + [:elapsed-time :garbage-collector])) + metric (first gc-metric-configs) + gc-time-metrics (->> (next gc-metric-configs) + (filterv #(= :time (:dimension %)))) + metric->values (util/metric->values metrics-samples) + total (* (:scale metric) + (reduce + (metric->values [:elapsed-time]))) + gc-samples (-> data-map final-gc-id util/metric->values) + total-gc (reduce + + + (mapv + (fn [m] + (* (:scale m) (reduce + (gc-samples (:path m))))) + gc-time-metrics)) + frac (/ total-gc total)] + (when (and total-gc (> frac warn-threshold)) + (println (format "Final GC ran for %s, %.1f%% of total sampling time (%s)" + (format/format-value :time total-gc) + (* frac 100) + (format/format-value :time total)))))) + +(defmethod view/final-gc-warnings* :print + [_ view data-map] + (print-final-gc-warnings view data-map)) + +(defn print-outlier-count + [metric-config num-samples outliers] + (let [outlier-counts (:outlier-counts outliers) + sum (reduce + (vals outlier-counts))] + (when (pos? sum) + (util/report "%32s: Found %d outliers in %d samples (%.3g %%)\n" + (:label metric-config) + sum + num-samples + (* 100.0 (/ sum num-samples))) + (doseq [[c v] (->> outlier-counts + (filter #(pos? (val %))))] + (util/report + " %12s\t %d (%2.4f %%)\n" + (name c) v (* 100.0 (/ v num-samples))))))) + +(defn print-outlier-counts + [{:keys [outliers-id] :as _view} data-map] + (let [outliers-id (or outliers-id :outliers) + outliers-map (data-map outliers-id) + metrics-defs (:metrics-defs outliers-map) + metric-configs (metric/all-metric-configs metrics-defs) + num-samples (have (:num-samples outliers-map)) + outliers (util/outliers outliers-map)] + (doseq [m metric-configs] + (print-outlier-count m num-samples (get-in outliers (:path m)))))) + +(defmethod view/outlier-counts* :print + [_ view data-map] + (print-outlier-counts view data-map)) + +(defn print-outlier-significance + [metric-config outlier-significance] + {:pre [(have? outlier-significance)]} + (let [labels {:unaffected "unaffected" + :slight "slightly inflated" + :moderate "moderately inflated" + :severe "severely inflated"}] + (util/report "%s Variance contribution from outliers : %.3g %%" + (:label metric-config) + (* (:significance outlier-significance) 100.0)) + (util/report "%s Variance is %s by outliers\n" + (:label metric-config) + (-> outlier-significance :effect labels)))) + +(defn print-outlier-significances + [{:keys [outlier-significance-id] :as _view} data-map] + (let [outlier-sig-id (or outlier-significance-id :outlier-significance) + outlier-sig-map (data-map outlier-sig-id) + metrics-defs (-> (:metrics-defs outlier-sig-map) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + outlier-sig (util/outlier-significance outlier-sig-map)] + (doseq [m metric-configs] + (print-outlier-significance + m + (have seq (get-in outlier-sig (:path m)) + {:metric m :outlier-sig outlier-sig}))))) + +(defmethod view/outlier-significance* :print + [_ view data-map] + (print-outlier-significances view data-map)) + +(defn- print-samples-with-outliers + [metric->values transforms outliers metric] + (let [path (:path metric) + values (metric->values path) + outlier-data (get-in outliers path)] + (doseq [[i v] (sort-by first (:outliers outlier-data))] + (println + (format "%36s[%5d] %s %s" + "" + i + (format/format-value + (:dimension metric) + (* (:scale metric) + (util/transform-sample-> (values i) transforms))) + (name v)))))) + +(defmethod view/samples* :print + [_ {:keys [samples-id outliers-id] :as _view} data-map] + (let [samples-id (or samples-id :samples) + outliers-id (or outliers-id :outliers) + metrics-samples (data-map samples-id) + outliers (data-map outliers-id) + metrics-defs (-> (:metrics-defs outliers) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map samples-id)] + + (println + (format "%32s: %d samples with batch-size %d" + "Samples" + (:num-samples metrics-samples) (:batch-size metrics-samples))) + (when outliers + (doseq [metric metric-configs] + (println (format"%36s%s" "" (:label metric))) + (print-samples-with-outliers + (util/metric->values metrics-samples) + transforms + (util/outliers outliers) + metric)) + (println)))) + +(defmethod view/collect-plan* :print + [_ _view data-map] + (let [warmup (some-> data-map :warmup) + est (some-> data-map :estimation) + samples (-> data-map :samples) + fmt "%32s: %d samples with batch-size %d (%d evaluations)"] + (println + (format fmt + "Sample Scheme" + (:num-samples samples) + (:batch-size samples) + (:eval-count samples))) + (when warmup + (println + (format fmt + "Warmup" + (:num-samples warmup) (:batch-size warmup) + (* (:num-samples warmup) (:batch-size warmup))))) + (when est + (println + (format fmt + "Estimation" + (:num-samples est) (:batch-size est) + (* (:num-samples est) (:batch-size est))))))) + + +(defmethod view/histogram* :print + [_ {:keys [samples-id quantiles-id outliers-id] :as _view} data-map] + (let [samples-id (or samples-id :samples) + quantiles-id (or quantiles-id :quantiles) + outliers-id (or outliers-id :outliers) + metrics-samples (data-map samples-id) + quantiles (data-map quantiles-id) + outliers (data-map outliers-id) + metrics-defs (-> (:metrics-defs metrics-samples) + (metric/filter-metrics + (metric/type-pred :quantitative))) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map samples-id) + histograms (->> metric-configs + (mapv + #(viewer-common/histogram + (util/metric->values metrics-samples) + (util/quantiles quantiles) + (util/outliers outliers) + transforms + %)) + (filterv some?))] + (doseq [h histograms] + (println + (format "%32s: %s Histogram" + (-> h :metric-config :label) + (-> h :unit))) + (run! + (fn [[x bin-count density] ] + (println (format "%34s %-7.3f %5d %-7.3g" "" x bin-count density))) + (mapv vector (:centers h) (:counts h) (:density h))) + (println)))) + +(defmethod view/quantiles* :print + [_ {:keys [quantiles-id]} data-map] + (let [quantiles-id (or quantiles-id :quantiles) + quantiles-map (have types/quantiles-map? + (data-map quantiles-id)) + metrics-defs (:metrics-defs quantiles-map) + metric-configs (metric/all-metric-configs metrics-defs) + transforms (util/get-transforms data-map quantiles-id) + table (viewer-common/quantiles + metric-configs + (util/quantiles quantiles-map) + transforms)] + (doseq [vs table] + (let [ks (sort (keys (dissoc vs :metric))) + pks (filterv #{0.25 0.5 0.75} ks) + oks (into [] (remove #{0.25 0.5 0.75}) ks)] + (println + (format "%22s Quantiles: %s" + (:metric vs) + (str/join ", " (mapv #(str % " " (vs %)) pks)))) + (doseq [ok oks] + (println (format "%32s %3.3g %s" "" ok (vs ok)))))))) + +(defmethod view/os* :print + [_ _ _sampled] + (let [ks [:arch :name :version :available-processors]] + (apply println + (-> (map + #(%1 (jvm/os-details)) + ks) + vec (conj "cpu(s)"))))) + +(defmethod view/runtime* :print + [_ _ _sampled] + (let [runtime-details (jvm/runtime-details)] + (apply println (map #(%1 runtime-details) [:vm-name :vm-version])) + (apply println "Runtime arguments:" + (:input-arguments runtime-details)))) + +(defmethod view/sample-percentiles* :print + [_ _view _sampled] + ;; TODO + ) diff --git a/bases/criterium/src/data_readers.clj b/bases/criterium/src/data_readers.clj new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/bases/criterium/src/data_readers.clj @@ -0,0 +1 @@ +{} diff --git a/bases/criterium/test/criterium/analyse/digest_samples_test.clj b/bases/criterium/test/criterium/analyse/digest_samples_test.clj new file mode 100644 index 0000000..db6230e --- /dev/null +++ b/bases/criterium/test/criterium/analyse/digest_samples_test.clj @@ -0,0 +1,246 @@ +(ns criterium.analyse.digest-samples-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.analyse :as analyse] + [criterium.analyse.digest-samples] + [criterium.collect-plan :as collect-plan] + [criterium.collector.metrics :as metrics] + [criterium.test-utils :refer [approx=]] + [criterium.types :as types] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have?]] + [criterium.util.t-digest :as t-digest])) + +(defn digest-samples + [data ^long batch-size] + {:post [(have? types/digest-samples-map? %)]} + (let [n (count (first (vals data)))] + {:type :criterium/digest + :metric->digest (reduce-kv + (fn [res p values] + (assoc + res p + (t-digest/compress + (reduce + t-digest/add-point + (t-digest/new-digest) + values)))) + {} + data) + :transform (if (= batch-size 1) + collect-plan/identity-transforms + (#'collect-plan/batch-transforms batch-size)) + :num-samples n + :batch-size batch-size + :eval-count (* n batch-size) + :metrics-defs (select-keys + (metrics/metrics) + (mapv first (keys data))) + :source-id nil + :expr-value (ffirst (vals data))})) + +(defn transformed-value-fn + [data-map id] + (let [m (-> data-map id) + transforms (util/get-transforms data-map id)] + #(util/transform-sample-> % transforms))) + +(defn transformed-digest-values + [data-map id p] + (let [m (-> data-map id) + digest (get (:metric->digest m) p) + tform (transformed-value-fn data-map id)] + (-> digest + (update :minimum tform) + (update :maximum tform) + (update :centroids #(mapv (fn [c] (update c :mean tform)) %))))) + +(deftest digest-transform-log-test + (testing "transform-log" + (let [raw-data [(Math/exp 1) (Math/exp 2) (Math/exp 3)] + + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/transform-log) data-map) + digest (-> result + :log-samples + :metric->digest + (get [:elapsed-time]))] + (testing "puts the log transformed metrics into the result-path" + (is (= [1.0 2.0 3.0] + (mapv :mean (:centroids digest))))) + (testing "does not change original samples" + (is (= samples (:samples result)))) + (testing "adds transforms for the values" + (is (approx= + (mapv (fn [^double v] (/ v 10.0)) raw-data) + (mapv + :mean + (:centroids + (transformed-digest-values + result + :log-samples + [:elapsed-time]))))) + (is (approx= 1.0 (t-digest/minimum digest))) + (is (approx= 3.0 (t-digest/maximum digest)))) + (testing "doesn't transform event-metrics " + (is (not (contains? (:log-samples result) [:compilation]))))))) + +(deftest digest-quantiles-test + (testing "quantiles" + (let [raw-data [10 20 30] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/quantiles {:quantiles [0.025 0.975]}) + data-map) + tform (transformed-value-fn data-map :samples)] + (testing "puts the quantiles into the result-path" + (let [qs [0.25 0.5 0.75 0.025 0.975] + vs (-> result :quantiles util/quantiles :elapsed-time)] + (is (approx= [10 20 30 10 30] (mapv vs qs))) + (is (approx= + [1.0 2.0 3.0 1.0 3.0] + (mapv + tform + (-> result :quantiles util/quantiles :elapsed-time vals)))))) + (testing "doesn't transform event-metrics " + (is (every? + #(not (contains? % :compilation)) + (->> result :quantiles))) + (is (= [:elapsed-time] + (->> result :quantiles util/quantiles keys))))))) + +(deftest digest-outliers-test + (testing "Outliers" + (let [raw-data [9 10 9 10 9 10 10000] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + quantiles (analyse/quantiles {:quantiles []}) + outliers (analyse/outliers)] + (is (= {:low-severe 0, :low-mild 0, :high-mild 0, :high-severe 1} + (-> data-map + quantiles + outliers + :outliers + util/outliers + :elapsed-time + :outlier-counts)))))) + +(deftest digest-stats-test + (testing "stats" + (let [raw-data [1 2 3] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/stats) data-map)] + (testing "puts the stats into the result-path" + (is (= {:min-val 1.0, + :max-val 3.0, + :mean 2.0, + :mean-plus-3sigma 5.0, + :variance 1.0, + :mean-minus-3sigma -1.0 + :n 3} + (->> result :stats util/stats :elapsed-time)))) + (testing "doesn't transform event-metrics " + (is (every? + #(not (contains? % :compilation)) + (->> result :stats))) + (is (= [:elapsed-time] + (->> result :stats util/stats keys)))))) + (testing "stats variance" + (let [raw-data [1 1 1 5 5 5 9 9 9] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 1) + data-map {:samples samples} + result ((analyse/stats) data-map)] + (testing "calculates sample variance" + (is (= 12.0 (:variance (->> result :stats util/stats :elapsed-time)))))) + (let [raw-data [1 1 1 5 5 5 9 9 9] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/stats) data-map)] + (testing "scales with batch size" + (let [v (:variance (->> result :stats util/stats :elapsed-time))] + (is (= 12.0 v)) + (is (= 1.20 (util/transform-sample-> + v + (util/get-transforms result :stats)))))))) + (testing "excludes outliers" + (let [raw-data [9 10 9 10 9 10 10000] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 1) + data-map {:samples samples} + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + outliers (analyse/outliers) + stats (analyse/stats) + result (-> data-map + quantiles + outliers + stats) + smap (->> result :stats util/stats :elapsed-time) + smap' (util/transform-vals-> + (->> result :stats util/stats :elapsed-time) + (util/get-transforms result :stats))] + (testing "calculates sample variance" + (is (approx= 9.5 (:mean smap))) + (is (approx= 0.3 (:variance smap))) + (is (approx= 9 (:min-val smap))) + (is (approx= 10 (:max-val smap))) + (is (approx= 11.14316767 (:mean-plus-3sigma smap))) + (is (approx= 7.8568323274845016 (:mean-minus-3sigma smap))) + (is (= 6 (:n smap))) + + (is (approx= 9.5 (:mean smap'))) + (is (approx= 0.3 (:variance smap'))) + (is (approx= 9 (:min-val smap'))) + (is (approx= 10 (:max-val smap'))) + (is (approx= 11.14316767 (:mean-plus-3sigma smap'))) + (is (approx= 7.8568323274845016 (:mean-minus-3sigma smap'))))) + + (testing "scales with batch size" + (let [raw-data [9 10 9 10 9 10 10000] + samples (digest-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 2) + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + data-map {:samples samples} + outliers (analyse/outliers) + stats (analyse/stats) + result (-> data-map quantiles outliers stats) + smap (-> result :stats util/stats :elapsed-time) + smap' (util/transform-vals-> + (-> result :stats util/stats :elapsed-time) + (util/get-transforms result :stats))] + (is (approx= 9.5 (:mean smap))) + (is (approx= 0.3 (:variance smap))) + (is (approx= 9 (:min-val smap))) + (is (approx= 10 (:max-val smap))) + (is (approx= 11.14316767 (:mean-plus-3sigma smap))) + (is (approx= 7.8568323274845016 (:mean-minus-3sigma smap))) + + (is (approx= (/ 9.5 2.0) (:mean smap'))) + (is (approx= (/ 0.3 2.0) (:variance smap'))) + (is (approx= 4.5 (:min-val smap'))) + (is (approx= 5 (:max-val smap'))) + (is (approx= (/ 11.14316767 2) (:mean-plus-3sigma smap'))) + (is (approx= (/ 7.8568323274845016 2) (:mean-minus-3sigma smap'))))))) diff --git a/bases/criterium/test/criterium/analyse_test.clj b/bases/criterium/test/criterium/analyse_test.clj new file mode 100644 index 0000000..7b10e95 --- /dev/null +++ b/bases/criterium/test/criterium/analyse_test.clj @@ -0,0 +1,346 @@ +(ns criterium.analyse-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.analyse :as analyse] + [criterium.benchmark :as benchmark] + [criterium.collect-plan :as collect-plan] + [criterium.collector.metrics :as metrics] + [criterium.test-utils] + [criterium.types :as types] + [criterium.util.helpers :as util] + [criterium.util.invariant :refer [have?]])) + +(deftest outlier-significance-impl--test + ;; http://www.ellipticgroup.com/misc/article_supplement.pdf, p22 + (testing "Outlier significance" + (let [batch-size 67108864] + (is (= 0.9960022873987793 + (analyse/outlier-significance* + (/ 1.395522860870968 batch-size) + (/ (* 0.0013859776344426547 0.0013859776344426547) + batch-size) + batch-size)))))) + +(defn metrics-samples + [data ^long batch-size] + {:post [(have? types/metrics-samples-map? %)]} + (let [n (count (first (vals data)))] + {:type :criterium/metrics-samples + :metric->values data + :transform (if (= batch-size 1) + collect-plan/identity-transforms + (#'collect-plan/batch-transforms batch-size)) + :num-samples n + :batch-size batch-size + :eval-count (* n batch-size) + :metrics-defs (select-keys + (metrics/metrics) + (mapv first (keys data))) + :source-id nil + :expr-value (ffirst (vals data))})) + +(defn transformed-metric-values + [data-map id p] + (let [m (-> data-map id) + transforms (util/get-transforms data-map id)] + (mapv + #(util/transform-sample-> % transforms) + (get (:metric->values m) p)))) + +(defn transformed-values + [data-map id vs] + (let [transforms (util/get-transforms data-map id)] + (prn :transforms transforms) + (mapv + #(util/transform-sample-> % transforms) + vs))) + +(deftest transform-log-test + (testing "transform-log" + (let [raw-data [(Math/exp 1) (Math/exp 2) (Math/exp 3)] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/transform-log) data-map)] + (testing "puts the log transformed metrics into the result-path" + (is (= [1.0 2.0 3.0] + (-> result + :log-samples + :metric->values + (get [:elapsed-time]))))) + (testing "doesnot change original samples" + (is (= samples (:samples result)))) + (testing "adds transfprms for the values" + (is (approx= + (mapv (fn [^double v] (/ v 10.0)) raw-data) + (transformed-metric-values result :log-samples [:elapsed-time])))) + (testing "doesn't transform event-metrics " + (is (not (contains? (:log-samples result) [:compilation]))))))) + +(deftest quantiles-test + (testing "quantiles" + (let [raw-data [10 20 30] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/quantiles {:quantiles [0.025 0.975]}) + data-map)] + (testing "puts the quantiles into the result-path" + (let [qs [0.25 0.5 0.75 0.025 0.975] + vs (-> result :quantiles util/quantiles :elapsed-time)] + (is (approx= + [15 20 25 10.5 29.5] + (mapv vs qs))) + (is (approx= + [1.5 2.0 2.5 1.05 2.95] + (transformed-values result :quantiles (mapv vs qs)))))) + (testing "doesn't transform event-metrics " + (is (every? + #(not (contains? % :compilation)) + (->> result :quantiles))) + (is (= [:elapsed-time] + (->> result :quantiles util/quantiles keys))))))) + +(deftest outliers-test + ;; http://www.ellipticgroup.com/misc/article_supplement.pdf, p22 + (testing "Outliers" + (let [raw-data [9 10 9 10 9 10 10000] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + quantiles (analyse/quantiles {:quantiles []}) + outliers (analyse/outliers)] + (is (= {:low-severe 0, :low-mild 0, :high-mild 0, :high-severe 1} + (-> data-map + quantiles + outliers + :outliers + util/outliers + :elapsed-time + :outlier-counts)))))) + +(deftest outlier-counts-test + ;; http://www.ellipticgroup.com/misc/article_supplement.pdf, p22 + (testing "Outlier counts" + (let [data-map + {:samples + {:type :criterium/collected-metrics-samples + :metric->values {[:elapsed-time] [1 1 1 1000]} + :transform collect-plan/identity-transforms + :batch-size 1 + :eval-count 4 + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time])}} + analyse (benchmark/->analyse + [[:quantiles {:quantiles [0.025 0.975]}] + :outliers])] + (is (= {:low-severe 0, :low-mild 0, :high-mild 0, :high-severe 1} + (-> (analyse data-map) + :outliers + util/outliers + :elapsed-time + :outlier-counts)))))) + +(deftest stats-test + (testing "stats" + (let [raw-data [1 2 3] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/stats) data-map)] + (testing "puts the stats into the result-path" + (is (= {:min-val 1.0, + :max-val 3.0, + :mean 2.0, + :mean-plus-3sigma 5.0, + :variance 1.0, + :mean-minus-3sigma -1.0 + :n 3} + (->> result :stats util/stats :elapsed-time)))) + (testing "doesn't transform event-metrics " + (is (every? + #(not (contains? % :compilation)) + (->> result :stats))) + (is (= [:elapsed-time] + (->> result :stats util/stats keys)))))) + (testing "stats variance" + (let [raw-data [1 1 1 5 5 5 9 9 9] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 1) + data-map {:samples samples} + result ((analyse/stats) data-map)] + (testing "calculates sample variance" + (is (= 12.0 (:variance (->> result :stats util/stats :elapsed-time)))))) + (let [raw-data [1 1 1 5 5 5 9 9 9] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 10) + data-map {:samples samples} + result ((analyse/stats) data-map)] + (testing "scales with batch size" + (let [v (:variance (->> result :stats util/stats :elapsed-time))] + (is (= 12.0 v)) + (is (= 1.20 (util/transform-sample-> + v + (util/get-transforms result :stats)))))))) + (testing "excludes outliers" + (let [raw-data [9 10 9 10 9 10 10000] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 1) + data-map {:samples samples} + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + outliers (analyse/outliers) + stats (analyse/stats) + result (-> data-map + quantiles + outliers + stats) + smap (->> result :stats util/stats :elapsed-time) + smap' (util/transform-vals-> + (->> result :stats util/stats :elapsed-time) + (util/get-transforms result :stats))] + (testing "calculates sample variance" + (is (approx= 9.5 (:mean smap))) + (is (approx= 0.3 (:variance smap))) + (is (approx= 9 (:min-val smap))) + (is (approx= 10 (:max-val smap))) + (is (approx= 11.14316767 (:mean-plus-3sigma smap))) + (is (approx= 7.8568323274845016 (:mean-minus-3sigma smap))) + (is (= 6 (:n smap))) + + (is (approx= 9.5 (:mean smap'))) + (is (approx= 0.3 (:variance smap'))) + (is (approx= 9 (:min-val smap'))) + (is (approx= 10 (:max-val smap'))) + (is (approx= 11.14316767 (:mean-plus-3sigma smap'))) + (is (approx= 7.8568323274845016 (:mean-minus-3sigma smap'))))) + + (testing "scales with batch size" + (let [raw-data [9 10 9 10 9 10 10000] + samples (metrics-samples + {[:elapsed-time] raw-data + [:compilation :time-ms] [0 0 0]} + 2) + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + data-map {:samples samples} + outliers (analyse/outliers) + stats (analyse/stats) + result (-> data-map quantiles outliers stats) + smap (-> result :stats util/stats :elapsed-time) + smap' (util/transform-vals-> + (-> result :stats util/stats :elapsed-time) + (util/get-transforms result :stats))] + (is (approx= 9.5 (:mean smap))) + (is (approx= 0.3 (:variance smap))) + (is (approx= 9 (:min-val smap))) + (is (approx= 10 (:max-val smap))) + (is (approx= 11.14316767 (:mean-plus-3sigma smap))) + (is (approx= 7.8568323274845016 (:mean-minus-3sigma smap))) + + (is (approx= (/ 9.5 2.0) (:mean smap'))) + (is (approx= (/ 0.3 2.0) (:variance smap'))) + (is (approx= 4.5 (:min-val smap'))) + (is (approx= 5 (:max-val smap'))) + (is (approx= (/ 11.14316767 2) (:mean-plus-3sigma smap'))) + (is (approx= (/ 7.8568323274845016 2) (:mean-minus-3sigma smap'))))))) + +(deftest event-stats-test + (testing "event-stats" + (let [data-map + {:samples + {:type :criterium/metrics-samples + :metrics-defs + (-> (select-keys + (metrics/metrics) + [:class-loader :compilation + :elapsed-time :garbage-collector]) + (assoc-in + [:garbage-collector] + {:type :event + :groups + {:total + {:summary + (str "%32s: ran %s times" + " for a total of %s in %s samples") + :values + [{:path [:garbage-collector :total :count] + :scale 1 + :dimension :count + :label "GC total count" + :type :event} + {:path [:garbage-collector :total :time-ms] + :scale 1e-3 + :dimension :time + :label "GC total time" + :type :event}] + :label "Garbage Collector"}}})) + :metric->values {[:elapsed-time] [1 2 3] + [:compilation :time-ms] [3 5 0] + [:garbage-collector :total :time-ms] [1 1 1] + [:garbage-collector :total :count] [2 1 1] + [:class-loader :loaded-count] [2 2 0] + [:class-loader :unloaded-count] [0 0 0]} + :batch-size 1 + :eval-count 3}} + result ((analyse/event-stats) data-map)] + (testing "puts the event-stats into the output-path" + (is (= {[:class-loader :loaded-count] 4, + [:class-loader :unloaded-count] 0, + [:class-loader :sample-count] 2, + [:compilation :time-ms] 8, + [:compilation :sample-count] 2, + [:garbage-collector :total :count] 4, + [:garbage-collector :total :time-ms] 3, + [:garbage-collector :total :sample-count] 3} + #_{:compilation {:time-ms 8 :sample-count 2} + :garbage-collector {:total + {:time-ms 3 :count 4 :sample-count 3}} + :class-loader {:sample-count 2 + :loaded-count 4 :unloaded-count 0}} + (->> result :event-stats util/event-stats))))))) + +(deftest outlier-effect-test + (is (= :unaffected (analyse/outlier-effect 0.009))) + (is (= :slight (analyse/outlier-effect 0.09))) + (is (= :moderate (analyse/outlier-effect 0.49))) + (is (= :severe (analyse/outlier-effect 0.51)))) + +(deftest outlier-significance-test + ;; http://www.ellipticgroup.com/misc/article_supplement.pdf, p22 + (testing "Outlier counts" + (let [data-map + {:samples + {:type :criterium/collected-metrics-samples + :metric->values {[:elapsed-time] [1 1 1 1000]} + :transform collect-plan/identity-transforms + :batch-size 1 + :eval-count 4 + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time])}} + analyse (benchmark/->analyse + [[:quantiles {:quantiles [0.025 0.975]}] + :outliers + :stats + :outlier-significance])] + + (is (= {:significance 0 + :effect :unaffected} + (-> (analyse data-map) + :outlier-significance + util/outlier-significance + :elapsed-time)))))) diff --git a/bases/criterium/test/criterium/bench/config_test.clj b/bases/criterium/test/criterium/bench/config_test.clj new file mode 100644 index 0000000..ac4f378 --- /dev/null +++ b/bases/criterium/test/criterium/bench/config_test.clj @@ -0,0 +1,60 @@ +(ns criterium.bench.config-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.bench-plans :as bench-plans] + [criterium.bench.config :as bench-config] + [criterium.collect-plan.config :as collect-plan-config] + [criterium.collector :as collector] + [criterium.collector-configs :as collector-configs] + [criterium.measured :as measured])) + +(deftest config-map-test + (let [measured (measured/expr 1)] + (measured/invoke measured (measured/args measured) 1) + (testing "config-map provides defaults" + (is (= (merge + bench-plans/default-with-warmup + {:collector-config + (->> + collector-configs/default-collector-config + (collect-plan-config/ensure-pipeline-stages + :with-jit-warmup)) + :collect-plan + (collect-plan-config/collect-plan-config + :with-jit-warmup {}) + :return-value [:samples :expr-value]}) + (bench-config/config-map {})))) + (testing "config-map can specify the pipeline stages" + (is (= (-> (merge + bench-plans/default-with-warmup + {:collector-config + (->> + {:stages [:class-loader + :compilation + :garbage-collector + :measured-args] + :terminator :elapsed-time} + (collect-plan-config/ensure-pipeline-stages + :with-jit-warmup)) + :collect-plan + (collect-plan-config/collect-plan-config + :with-jit-warmup {}) + :return-value [:samples :expr-value]})) + (bench-config/config-map + {:metric-ids [:class-loader + :compilation + :garbage-collector]})))) + (testing "config-map can specify the sample scheme" + (is (= (-> (merge + bench-plans/default-one-shot + {:collector-config + (->> + {:stages [] + :terminator :elapsed-time} + (collect-plan-config/ensure-pipeline-stages + :one-shot)) + :collect-plan + (collect-plan-config/collect-plan-config + :one-shot {}) + :return-value [:samples :expr-value]})) + (bench-config/config-map {:collect-plan :one-shot})))))) diff --git a/bases/criterium/test/criterium/bench_test.clj b/bases/criterium/test/criterium/bench_test.clj new file mode 100644 index 0000000..b550415 --- /dev/null +++ b/bases/criterium/test/criterium/bench_test.clj @@ -0,0 +1,42 @@ +(ns criterium.bench-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.bench :as bench] + [criterium.bench.impl :as bench-impl])) + +(deftest bench-test + (testing "bench" + (bench-impl/last-bench! nil) + (is (nil? (bench/last-bench))) + (let [out (with-out-str (bench/bench 1))] + (testing "outputs the estimated time on stdout" + (is (re-find + #"Elapsed Time: [0-9.]+ [mn]s 3σ \[[0-9.-]+ [0-9.]+] min [0-9.]+" + out))))) + (testing "time with stats" + (let [out (with-out-str (bench/bench 1 :limit-time-s 0.1))] + (testing "outputs statistics on stdout" + (is (re-find #"3σ" out))))) + (testing "time with one-shot" + (let [out (with-out-str (bench/bench 1 :collect-plan :one-shot))] + (testing "outputs statistics on stdout" + (is (not (re-find #"±" out)))))) + (testing "time returns expression-value" + (with-out-str + (let [v (bench/bench 1)] + (is (= 1 v))))) + + (testing "all pipelines" + (with-out-str + (let [v (bench/bench + 1 + :limit-time-s 0.1 + :metric-ids [:elapsed-time + :memory + :thread-allocation + :garbage-collector + :finalization + :compilation + :measured-args + :class-loader])] + (is (= 1 v)))))) diff --git a/bases/criterium/test/criterium/collect_plan/impl_test.clj b/bases/criterium/test/criterium/collect_plan/impl_test.clj new file mode 100644 index 0000000..be08a70 --- /dev/null +++ b/bases/criterium/test/criterium/collect_plan/impl_test.clj @@ -0,0 +1,30 @@ +(ns criterium.collect-plan.impl-test + (:require + [clojure.string :as str] + [clojure.test :refer [deftest is]] + [criterium.collect-plan.impl :as impl])) + +(deftest limit-samples-test + (let [s (with-out-str + (let [[nw nm] (impl/limit-samples + 10000 + 1000 100 + 0 + 10000 + 0)] + (is (= 1000 nw)) + (is (= 100 nm))))] + (is (= "" s))) + (let [s (with-out-str + (let [[nw nm] (impl/limit-samples + 10000 + 1000 100 + 5000 + 5000 + 10000)] + (is (= 500 nw)) + (is (= 50 nm))))] + (is (str/includes? s "time required for full JIT is 1.50e-05s")) + (is (str/includes? s "limited to 1.00e-05s")) + (is (str/includes? s "pass `:limit-time-s 1.50e-05` to improve accuracy")) + (is (str/includes? s "consider benchmarks at a lower level")))) diff --git a/bases/criterium/test/criterium/collect_plan_test.clj b/bases/criterium/test/criterium/collect_plan_test.clj new file mode 100644 index 0000000..5fe2b95 --- /dev/null +++ b/bases/criterium/test/criterium/collect_plan_test.clj @@ -0,0 +1,51 @@ +(ns criterium.collect-plan-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.collect-plan :as collect-plan] + [criterium.collect-plan.config :as collect-plan-config] + [criterium.collector :as collector] + [criterium.measured :as measured] + [criterium.types :as types])) + +(deftest one-shot-test + (testing "one-shot" + (let [measured (measured/measured + (fn [] []) + (fn [_ _] [1000000 1])) + collector (collector/collector + {:stages [:compilation :memory] + :terminator :elapsed-time}) + data-map (collect-plan/collect + (collect-plan-config/collect-plan-config + :one-shot + {}) + collector + measured)] + (is (map? data-map)) + (is (types/collected-metrics-map? (:samples data-map))) + (is (vector? ((:metric->values (:samples data-map)) [:elapsed-time]))) + (is (= 1 + (count ((:metric->values (:samples data-map)) [:elapsed-time])))) + (is (every? vector? (vals (:metric->values (:samples data-map))))) + (is (= 1 (:expr-value (:samples data-map))))))) + +(deftest full-test + (testing "full sampling" + (let [measured (measured/measured + (fn [] []) + (fn [_ _] [1000000 1])) + collector (collector/collector {:stages [:compilation :memory] + :terminator :elapsed-time}) + data-map (collect-plan/collect + (collect-plan-config/collect-plan-config + :with-jit-warmup + {}) + collector + measured)] + (is (map? data-map)) + (is (types/collected-metrics-map? (:samples data-map))) + (is (vector? ((:metric->values (:samples data-map)) [:elapsed-time]))) + (is (<= 10 + (count ((:metric->values (:samples data-map)) [:elapsed-time])))) + (is (every? vector? (vals (:metric->values (:samples data-map))))) + (is (= 1 (:expr-value (:samples data-map))))))) diff --git a/bases/criterium/test/criterium/collect_test.clj b/bases/criterium/test/criterium/collect_test.clj new file mode 100644 index 0000000..73e4217 --- /dev/null +++ b/bases/criterium/test/criterium/collect_test.clj @@ -0,0 +1,38 @@ +(ns criterium.collect-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.agent :as agent] + [criterium.collect :as collect] + [criterium.collector :as collector] + [criterium.measured :as measured])) + +(deftest full-zero-garbage-test + (testing "full sampling" + (let [measured (measured/measured + (fn [] [:b]) + (fn [_ _] [10000000 1])) + collector (collector/collector + {:stages [:garbage-collector :compilation] + :terminator :elapsed-time}) + ;; run collector for JIT + _ (dotimes [_ 10000] + (collect/collect-arrays + collector + measured + 1 + 10)) + [allocations sampled] (agent/with-allocation-tracing + (collect/collect-arrays + collector + measured + 1 + 10)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (= 10 (alength ^objects (:samples sampled)))) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sampled) "hold onto samples reference until this point")))) diff --git a/bases/criterium/test/criterium/collector/fns_test.clj b/bases/criterium/test/criterium/collector/fns_test.clj new file mode 100644 index 0000000..c17708e --- /dev/null +++ b/bases/criterium/test/criterium/collector/fns_test.clj @@ -0,0 +1,225 @@ +(ns criterium.collector.fns-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.agent :as agent] + [criterium.collector :as collector] + [criterium.measured :as measured])) + +;; (defn prim-f ^long [^long l] +;; l) + +#_(deftest expression-garbage-test + (let [[allocations res] (agent/with-allocation-tracing + {:a "a"}) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (is (some? res)) + (is (zero? freed-bytes) + (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations)))) + (let [[allocations res] (agent/with-allocation-tracing + {:a "a" + :b "b"}) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (is (some? res)) + (is (zero? freed-bytes) + (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations)))) + (let [[allocations res] (agent/with-allocation-tracing + [:a :b]) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (is (some? res)) + (is (zero? freed-bytes) + (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations)))) + (let [[allocations res] (agent/with-allocation-tracing + (criterium.jvm/class-loader-counts)) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (is (some? res)) + (is (zero? freed-bytes) + (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations)))) + (let [[allocations res] (agent/with-allocation-tracing + [(criterium.jvm/class-loader-counts) + (criterium.jvm/class-loader-counts)]) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (is (some? res)) + (is (zero? freed-bytes) + (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations)))) + (let [f prim-f + [allocations res] (agent/with-allocation-tracing + (* (+ (f 1) + (f 2)) + (f 3))) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (tap> (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations))) + (is (zero? freed-bytes) + (filterv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations))) + (is (= 9 res)))) + +(deftest zero-garbage-test + (testing "Sampling is zero garbage" + (let [measured (measured/measured + (fn [] []) + (fn [_ _] [1 1])) + measured measured + state []] + (testing "for elapsed-time-metric" + (let [sample (make-array Object 1) + p (collector/collector + {:stages [] :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (= [1 1] res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for measured-args" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:measured-args] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for class-loader" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:class-loader] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for compilation" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:compilation] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for memory" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:memory] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for finalization" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:finalization] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for garbage-collector" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:garbage-collector] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check"))) + (testing "for thread-allocation" + (let [sample (make-array Object 2) + p (collector/collector + {:stages [:thread-allocation] + :terminator :elapsed-time}) + _ (dotimes [_ 10] + ((:f p) sample measured state 1 0)) + [allocations res] (agent/with-allocation-tracing + ((:f p) sample measured state 1 0)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (nil? res)) + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))) + (is (some? sample) + "Make sure sample isn't collected until after garbage check")))))) diff --git a/bases/criterium/test/criterium/collector/metrics_test.clj b/bases/criterium/test/criterium/collector/metrics_test.clj new file mode 100644 index 0000000..a3c82c6 --- /dev/null +++ b/bases/criterium/test/criterium/collector/metrics_test.clj @@ -0,0 +1,10 @@ +(ns criterium.collector.metrics-test + (:require + [clojure.test :refer [deftest is]] + [criterium.collector.metrics :as metrics] + [criterium.metric :as metric])) + +(deftest metrics-test + (is (every? + metric/metric-config? + (metric/all-metric-configs (metrics/metrics))))) diff --git a/bases/criterium/test/criterium/collector_test.clj b/bases/criterium/test/criterium/collector_test.clj new file mode 100644 index 0000000..0f87c12 --- /dev/null +++ b/bases/criterium/test/criterium/collector_test.clj @@ -0,0 +1,116 @@ +(ns criterium.collector-test + (:require + [clojure.set :as set] + [clojure.spec.test.alpha] + [clojure.test :refer [deftest is testing]] + [criterium.collector :as collector] + [criterium.collector.impl :as collector-impl] + [criterium.measured :as measured])) + +(def m-value 12345) +(def m (measured/measured + (fn args-f [] ::args) + (fn measured-f [args _n] + [m-value [args args]]) + nil)) + +(def base-keys #{:expr-value :elapsed-time}) + +(defn run-collector + [collector-config measured] + (let [collector (collector/collector collector-config) + sample (collector/collect-array + collector + measured + (measured/args measured) + 1)] + (is (= (:length collector) (alength sample))) + (collector/transform collector sample))) + +(deftest execute-test + (testing "Execute a measured with time-metric" + (let [res (run-collector {:stages [] :terminator :elapsed-time} m)] + (is (map? res) res) + (testing "Has the measured time on the :elapsed-time key" + (is (= m-value (:elapsed-time res)))) + (testing "Has the evaluation count on the :eval-count key" + (is (= m-value (:elapsed-time res)))) + (testing "Has the measured expression value on the :expr-value key" + (is (= m-value (:elapsed-time res))))))) + +(deftest with-measured-args-test + (testing "Execute a measured with measured-args" + (let [res (run-collector + {:stages [:measured-args] :terminator :elapsed-time} + m)] + (testing "Has the measured state on the :state key" + (is (= ::args (:args res))) + (is (= m-value (:elapsed-time res))))))) + +(defn- all-stages [] + (->> 'criterium.collector.fns + ns-publics + vals + (mapv var-get) + (remove collector/terminal?) + (filterv collector/stage?))) + +(defn- all-terminators [] + (->> 'criterium.collector.fns + ns-publics + vals + (mapv var-get) + (filterv collector/terminal?))) + +(deftest pipeline-fns-test + (doseq [stage (all-stages)] + (testing (str "Pipeline function " (:id stage)) + (let [res (run-collector {:stages [stage] :terminator :elapsed-time} m) + ks (set (keys res))] + (is (= base-keys (set/intersection base-keys ks))))))) + +(deftest pipeline*-test + (testing "pipeline-fn*" + (testing "builds a pipeline" + (is (fn? (collector-impl/pipeline-sample-fn + {:stages (all-stages) + :terminator (first (all-terminators))})))) + (testing "throws if passed a non keyword" + (is (thrown? clojure.lang.ExceptionInfo + (collector-impl/pipeline-sample-fn + {:stages [::unknown] + :terminator (first (all-terminators))})))) + (testing "throws if passed an unknown terminal function" + (is (thrown? clojure.lang.ExceptionInfo + (collector-impl/pipeline-sample-fn + {:stages (all-stages) + :terminator ::unknown})))))) + +(deftest collector-test + (testing "collector" + (testing "builds a pipeline" + (let [collector (collector/collector + {:stages (all-stages) + :terminator :elapsed-time})] + (is (map? collector)) + (is (contains? collector :metrics-defs)) + (is (fn? (-> collector :f))) + (is (fn? (-> collector :x))))) + (testing "throws if passed a non keyword" + (is (thrown? clojure.lang.ExceptionInfo + {:config + {:collector-config + (collector/collector + {:config + {:collector-config + {:stages [::unknown] + :terminator :elapsed-time}}})}}))) + (testing "throws if passed an unknown terminal function" + (is (thrown? clojure.lang.ExceptionInfo + {:config + {:collector-config + (collector/collector + {:config + {:collector-config + {:stages (all-stages) + :terminator ::unknown}}})}}))))) diff --git a/bases/criterium/test/criterium/core_test.clj b/bases/criterium/test/criterium/core_test.clj new file mode 100644 index 0000000..efefb67 --- /dev/null +++ b/bases/criterium/test/criterium/core_test.clj @@ -0,0 +1,9 @@ +(ns criterium.core-test + (:require + [clojure.test :refer [deftest is]] + [criterium.core :as core])) + +(deftest bench-test + (let [s (with-out-str + (core/bench 1 :target-execution-time 1))] + (is s))) diff --git a/bases/criterium/test/criterium/data/air_quality.clj b/bases/criterium/test/criterium/data/air_quality.clj new file mode 100644 index 0000000..b139009 --- /dev/null +++ b/bases/criterium/test/criterium/data/air_quality.clj @@ -0,0 +1,18 @@ +(ns criterium.data.air-quality + "Mean ozone concentration (in parts per billion) in the air from 13:00 to + 15:00 hours at Roosevelt Island, New York City, from May 1, 1973 to September + 30, 1973. it was obtained from the New York State Department of Conservation. + + Source from airquality data set built into R. + + data(airquality) + airquality[c(\"Ozone\")] + ") + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def ozone + [41 36 12 18 28 23 19 8 7 16 11 14 18 14 34 6 30 11 1 11 4 32 23 45 115 37 + 29 71 39 23 21 37 20 12 13 135 49 32 64 40 77 97 97 85 10 27 7 48 35 61 79 + 63 16 80 108 20 52 82 50 64 59 39 9 16 78 35 66 122 89 110 44 28 65 22 59 + 23 31 44 21 9 45 168 73 76 118 84 85 96 78 73 91 47 32 20 23 21 24 44 21 + 28 9 13 46 18 13 24 16 13 23 36 7 14 30 14 18 20]) diff --git a/bases/criterium/test/criterium/data/suicide.clj b/bases/criterium/test/criterium/data/suicide.clj new file mode 100644 index 0000000..0244a5a --- /dev/null +++ b/bases/criterium/test/criterium/data/suicide.clj @@ -0,0 +1,33 @@ +(ns criterium.data.suicide + "lengths (in days) of psychiatric treatment spells for patients + + Sourced from https://rdrr.io/cran/bde/man/suicide.r.html + + Silverman, B. (1986). Density Estimation for Statistics and Data + Analysis. Chapman & Hall + + Copas, J. B. and Fryer, M. J. (1980). Density estimation and suicide + risks in psychiatric treatment. Journal of the Royal Statistical + Society. Series A, 143(2), 167-176") + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def days-of-treatment + "The dataset comprises lengths (in days) of psychiatric treatment spells for + patients used as controls in a study of suicide risks. The data have been + scaled to the interval [0,1] by dividing each data sample by the maximum + value." + [0.001356852 0.001356852 0.001356852 0.006784261 0.009497965 0.010854817 + 0.010854817 0.017639077 0.018995929 0.018995929 0.023066486 0.024423338 + 0.028493894 0.028493894 0.029850746 0.033921303 0.036635007 0.036635007 + 0.040705563 0.040705563 0.042062415 0.042062415 0.043419267 0.046132972 + 0.047489824 0.048846676 0.050203528 0.051560380 0.052917232 0.052917232 + 0.054274084 0.066485753 0.066485753 0.073270014 0.075983718 0.075983718 + 0.084124830 0.085481682 0.088195387 0.088195387 0.090909091 0.101763908 + 0.103120760 0.107191316 0.111261872 0.112618725 0.113975577 0.113975577 + 0.113975577 0.122116689 0.123473541 0.124830393 0.126187246 0.126187246 + 0.139755767 0.139755767 0.150610583 0.151967436 0.161465400 0.165535957 + 0.166892809 0.170963365 0.175033921 0.181818182 0.195386703 0.199457259 + 0.207598372 0.221166893 0.226594301 0.237449118 0.309362280 0.313432836 + 0.318860244 0.328358209 0.347354138 0.347354138 0.348710991 0.421981004 + 0.426051560 0.436906377 0.500678426 0.563093623 0.777476255 0.826322931 + 0.868385346 1.000000000]) diff --git a/bases/criterium/test/criterium/data/wage2.clj b/bases/criterium/test/criterium/data/wage2.clj new file mode 100644 index 0000000..2a9aef0 --- /dev/null +++ b/bases/criterium/test/criterium/data/wage2.clj @@ -0,0 +1,58 @@ +(ns criterium.data.wage2 + "Introductory Econometrics: A Modern Approach, 6e by Jeffrey M. Wooldridge") + +;; install.packages("wooldridge") +;; library(wooldridge) +;; str(wage2) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(def wage2 + [769 808 825 650 562 1400 600 1081 1154 1000 930 921 900 1318 1792 958 1360 850 830 + 471 1275 1615 873 2137 1053 1602 1188 800 1417 635 1000 1424 2668 666 1779 782 1572 + 1274 714 1081 692 1318 1239 1027 1748 981 770 1154 1155 808 1100 1154 1749 1000 462 + 769 875 1375 1452 800 1748 1151 840 978 963 619 442 600 1366 1643 1455 2310 1682 1235 + 855 1072 1040 1000 675 1100 996 732 1200 1694 686 754 857 832 579 672 2500 1076 750 + 1186 833 650 1250 1122 865 808 1299 903 900 625 1586 962 1539 1110 1282 770 1000 895 + 1205 750 654 601 600 433 1188 635 1225 1151 865 1031 1049 1000 1105 1924 1346 809 + 1495 1346 1200 500 1325 900 800 800 1034 980 884 480 923 513 1105 1193 2771 779 950 + 1394 1495 650 670 1126 450 1028 2404 1899 757 1250 1162 1025 1100 714 1318 1411 2162 + 1273 1140 578 942 1058 750 1000 951 635 1250 675 400 577 590 923 1250 1100 1130 652 + 618 962 529 817 962 840 866 2404 1126 1160 723 1778 1903 1010 971 525 525 670 500 + 1058 550 500 727 865 1081 1304 575 623 515 1273 990 600 1160 500 795 500 740 1250 + 1014 1250 913 1346 445 265 1250 1607 1452 1391 821 794 500 520 1730 1924 1155 2162 + 923 1115 449 1500 826 937 978 1031 1272 1136 800 1339 1063 935 808 375 537 1082 930 + 1155 548 622 841 769 587 1924 1058 417 1202 1154 1070 1202 711 1202 850 1000 490 1000 + 865 1375 1586 1602 3078 898 906 952 571 445 289 1444 962 1075 909 1250 620 1016 800 + 1050 1079 654 781 1038 1924 1202 666 905 890 817 812 577 756 1011 1155 1025 1350 1001 + 796 1230 754 714 1000 2067 912 600 951 711 1151 1000 841 400 1175 1202 1442 538 781 + 750 841 700 1346 800 1250 1105 475 762 962 721 800 658 1270 1313 824 1442 1400 1038 + 668 1100 1000 523 1111 962 729 690 1010 600 596 850 670 793 1442 670 876 841 975 1223 + 910 533 750 1206 745 900 1170 540 550 615 909 769 984 833 879 1027 1000 465 1100 641 + 1035 1212 950 938 1250 586 693 562 375 673 654 692 1111 1368 1282 1250 1346 1424 854 + 888 1161 583 1260 947 1850 1575 758 1442 489 1126 1000 500 1200 565 1920 684 774 233 + 975 1366 2137 700 1200 1161 729 750 1026 1111 625 1200 1541 1154 310 610 1749 1000 + 350 765 790 818 477 938 2099 350 940 1202 450 1058 1000 318 556 958 995 1600 606 511 + 1411 1346 1522 1075 1200 1377 874 625 1250 1082 693 727 615 913 884 698 800 812 1000 + 549 1300 923 1539 721 815 600 1100 1058 962 433 940 1417 1000 500 800 766 750 550 795 + 723 1039 200 400 575 850 508 1162 1199 1270 350 963 1463 802 642 751 488 1400 940 866 + 675 375 325 346 1442 560 550 950 684 1322 1634 417 472 666 1679 1039 1250 760 1154 + 1602 1442 865 705 1000 852 770 1000 1329 1682 577 685 577 1040 1250 600 480 1000 988 + 478 440 485 705 1550 960 1000 625 1346 961 479 1201 1346 1122 543 450 507 547 586 462 + 705 1566 1634 1282 556 1200 855 1053 1000 900 1602 508 1500 1682 750 843 409 1004 650 + 808 2137 761 769 345 495 987 2500 1098 1212 577 390 1500 583 460 945 1442 1333 1500 + 1333 700 973 2162 797 400 1015 1744 630 445 660 779 377 560 1122 453 1386 1539 1154 + 962 722 480 808 1442 1091 350 500 1026 577 1333 915 1105 910 1000 1160 1001 713 929 + 400 1241 1065 403 940 812 700 575 450 621 441 625 726 500 1000 393 600 962 962 865 + 1154 1386 732 865 700 975 1300 900 829 1000 827 500 1155 950 700 1710 533 2004 890 + 3078 1539 508 1354 1143 962 1250 990 905 926 1559 1312 923 879 800 1049 550 1190 583 + 1200 797 1371 360 1270 1832 909 1746 520 808 2137 692 931 812 866 1442 500 1384 528 + 881 1026 1212 1620 1843 1602 1000 737 1699 1699 1025 1107 625 720 855 1250 1130 900 + 1924 962 1874 1573 940 900 882 1710 1260 751 1097 1001 1154 1000 1250 857 1211 937 + 904 872 1200 1261 950 115 673 1084 1058 800 369 1924 855 864 1092 1025 850 673 800 + 1049 884 800 1200 664 1111 850 1000 875 1202 666 923 788 950 1442 866 625 680 693 875 + 2308 1196 700 721 1500 1050 801 640 1198 390 889 1076 1127 750 855 525 1104 553 800 + 1384 425 510 370 596 402 418 1133 681 950 600 661 925 550 606 425 340 692 575 571 817 + 987 616 1026 808 808 788 850 900 1418 260 662 562 562 357 1009 1442 651 750 754 700 + 503 937 624 750 900 540 642 400 900 513 894 1282 485 325 769 618 1040 751 380 300 753 + 1065 1070 1573 650 700 494 890 520 891 570 1444 481 500 1473 803 962 1000 600 450 629 + 492 1562 357 960 566 481 1442 645 788 644 477 664 520 1202 538 873 1000]) diff --git a/bases/criterium/test/criterium/instrument_fn_test.clj b/bases/criterium/test/criterium/instrument_fn_test.clj new file mode 100644 index 0000000..0ce67ae --- /dev/null +++ b/bases/criterium/test/criterium/instrument_fn_test.clj @@ -0,0 +1,98 @@ +(ns criterium.instrument-fn-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.collector :as collector] + [criterium.instrument-fn :as instrument-fn] + [criterium.jvm :as jvm] + [criterium.sampler :as sampler] + [criterium.types :as types])) + +(def ^:private seen (volatile! 0)) + +(defn- busy-wait + ([] (busy-wait 10)) + ([t-ns] + (let [t-ns (long t-ns) + t0 (jvm/timestamp)] + (vswap! seen (fn [i] (inc (long i)))) + (loop [] + (when (< (unchecked-subtract (jvm/timestamp) t0) t-ns) + (recur)))))) + +(deftest instrumented-fn-test + (testing "basic instrumentation" + (vreset! seen 0) + (let [conf {:stages [] + :terminator (collector/maybe-var-get-stage + :elapsed-time) + :sample-count 1} + inst-f (instrument-fn/instrument-fn busy-wait conf)] + + (is (satisfies? sampler/Sampler inst-f) + "implements Sampler") + (is (instance? clojure.lang.IFn inst-f) + "implements IFn") + (is (instance? Runnable inst-f) + "implements Runnable") + (is (instance? java.util.concurrent.Callable inst-f) + "implements Callable") + + (inst-f 1) + (is (= 1 @seen) "original function called") + (is (= 1 (count (-> (sampler/samples-map inst-f) + :metric->values + (get [:elapsed-time])))) + "sample collected") + + (sampler/reset-samples! inst-f) + (is (empty? (-> (sampler/samples-map inst-f) + :metric->values + (get [:elapsed-time]))) + "samples can be reset"))) + + (testing "function invocation" + (vreset! seen 0) + (let [conf {:stages [] + :terminator (collector/maybe-var-get-stage + :elapsed-time) + :sample-count 1} + inst-f (instrument-fn/instrument-fn busy-wait conf)] + + (.run ^Runnable inst-f) + (is (= 1 @seen) "Runnable.run works") + + (.call ^Callable inst-f) + (is (= 2 @seen) "Callable.call works") + + (inst-f 1) + (is (= 3 @seen) "direct invoke works") + + (apply inst-f [1]) + (is (= 4 @seen) "apply works") + + (is (= 4 (count (-> (sampler/samples-map inst-f) + :metric->values + (get [:elapsed-time])))) + "all invocations collected samples"))) + + (testing "samples can be analyzed" + (let [conf {:stages [:compilation + :garbage-collector] + :terminator :elapsed-time} + inst-f (instrument-fn/instrument-fn busy-wait conf) + start (jvm/timestamp) + _ (inst-f 1) + _ (inst-f 2) + finish (jvm/timestamp) + elapsed (unchecked-subtract finish start) + sample-m (sampler/samples-map inst-f)] + (is (types/metrics-samples-map? sample-m)) + (is (= 2 (count ((:metric->values sample-m) [:elapsed-time]))) + "samples returned") + (is (= (count ((:metric->values sample-m) [:elapsed-time])) + (:eval-count sample-m)) + "eval-count correct") + (is (= 1 (:batch-size sample-m)) "batch-size is correct") + (is (>= elapsed + (reduce + ((:metric->values sample-m) [:elapsed-time]))) + "elapsed time is sane")))) diff --git a/bases/criterium/test/criterium/instrument_test.clj b/bases/criterium/test/criterium/instrument_test.clj new file mode 100644 index 0000000..2927023 --- /dev/null +++ b/bases/criterium/test/criterium/instrument_test.clj @@ -0,0 +1,144 @@ +(ns criterium.instrument-test + (:require + [clojure.set :as set] + [clojure.test :refer [deftest is testing]] + [criterium.analyse :as analyse] + [criterium.collector :as collector] + [criterium.instrument :as instrument] + [criterium.jvm :as jvm] + [criterium.sampler :as sampler] + [criterium.types :as types] + [criterium.util.helpers :as util])) + +;; instrument's measured never have their `args-fn` called. +(def seen (volatile! 0)) + +(def original-f @#'instrument/original-f) + +(defn busy-wait + [t-ns] + (let [t-ns (long t-ns) + t0 (jvm/timestamp)] + (vswap! seen (fn [i] (inc (long i)))) + (loop [] + (when (< (unchecked-subtract (jvm/timestamp) t0) t-ns) + (recur))))) + +(deftest instrument!-test + (testing "basic instrumentation" + (let [v #'busy-wait + orig-f @v + conf {:stages [] + :terminator (collector/maybe-var-get-stage + :elapsed-time) + :sample-count 1}] + (vreset! seen 0) + (instrument/instrument! v conf) + (is (not= busy-wait v) "function is wrapped") + (is (= orig-f (#'instrument/original-f (meta v))) + "original function stored") + (is (types/metrics-samples-map? (sampler/samples-map @v)) + "samples atom added") + + ;; Test idempotency + (instrument/instrument! v conf) + (is (= @v @v) "second instrumentation is idempotent") + + (busy-wait 1) + (is (= 1 @seen) "original function called") + (is (= 1 (count + (-> @v + (sampler/samples-map) + :metric->values + (get [:elapsed-time])))) + "sample collected") + + (instrument/uninstrument! v))) + + (testing "instrumentation preserves metadata" + (let [v #'busy-wait + conf {:stages [] + :terminator (collector/maybe-var-get-stage + :elapsed-time) + :sample-count 1}] + (alter-meta! v assoc :test-key :test-value) + (instrument/instrument! v conf) + (is (= :test-value (:test-key (meta v))) "preserves existing metadata") + (instrument/uninstrument! v)))) + +(deftest uninstrument!-test + (testing "basic uninstrumentation" + (let [v #'busy-wait + conf {:stages [] + :terminator :elapsed-time + :sample-count 1}] + (vreset! seen 0) + (let [original @v] + (instrument/instrument! v conf) + (instrument/uninstrument! v) + (is (= original @v) "original function restored") + (is (not (#'instrument/original-f (meta v))) + "tracking metadata removed")))) + + (testing "uninstrumentation idempotency" + (let [v #'busy-wait + original @v] + (instrument/uninstrument! v) + (is (= original @v) "safe to call on non-instrumented var")))) + +(deftest basic-instrumentation-test + (vreset! seen 0) + (let [original @#'busy-wait + collector-config {:stages [:compilation + :garbage-collector] + :terminator :elapsed-time} + start (jvm/timestamp)] + (instrument/uninstrument! #'busy-wait) + (instrument/instrument! #'busy-wait collector-config) + (is (original-f (meta #'busy-wait)) "function wrapped") + (is (types/metrics-samples-map? (sampler/samples-map busy-wait)) + "sample atom added") + (is (not= original-f @#'busy-wait) "wrapper is installed") + (busy-wait 1) + (is (= 1 + (-> busy-wait + sampler/samples-map + :metric->values + (get [:elapsed-time]) + count)) + "one sample added") + (busy-wait 2) + (is (= 2 + (-> busy-wait + sampler/samples-map + :metric->values + (get [:elapsed-time]) + count)) + "two samples added") + (let [finish (jvm/timestamp) + elapsed (unchecked-subtract finish start) + sample-map (sampler/samples-map busy-wait)] + (is (= 2 @seen) "original function called twice") + + ;; (is result "result returned") + (is (types/metrics-samples-map? sample-map) "sample map returned") + (is (= 2 (count ((:metric->values sample-map) [:elapsed-time]))) + "samples returned") + (is (= (count ((:metric->values sample-map) [:elapsed-time])) + (:eval-count sample-map)) + "eval-count correct") + (is (= 1 (:batch-size sample-map)) "batch-size is correct") + (is (>= elapsed (reduce + ((:metric->values sample-map) [:elapsed-time]))) + "elapsed time is sane") + (let [data-map ((analyse/stats) {:samples sample-map}) + mean-time (-> data-map + :stats util/stats + :elapsed-time + :mean)] + (is (>= (/ elapsed 2) mean-time) "can be analysed"))) + (instrument/uninstrument! #'busy-wait) + (is (= original @#'busy-wait) "function restored") + (is (empty? (set/intersection + (set (keys (meta #'busy-wait))) + #{original-f})) + "metadata removed"))) diff --git a/bases/criterium/test/criterium/jvm/impl_test.clj b/bases/criterium/test/criterium/jvm/impl_test.clj new file mode 100644 index 0000000..fd0d707 --- /dev/null +++ b/bases/criterium/test/criterium/jvm/impl_test.clj @@ -0,0 +1,319 @@ +(ns criterium.jvm.impl-test + (:require + [clojure.test :refer [deftest is testing use-fixtures]] + [clojure.string :as str] + [criterium.jvm.impl :as impl])) + +;; Fixtures + +(def ^:private original-thread-state (atom nil)) + +(defn save-thread-monitoring-state [f] + (let [bean (.. java.lang.management.ManagementFactory getThreadMXBean)] + (reset! original-thread-state + {:contention-monitoring (.isThreadContentionMonitoringEnabled bean) + :cpu-timing (.isThreadCpuTimeEnabled bean)})) + (f) + (let [{:keys [contention-monitoring cpu-timing]} @original-thread-state] + (impl/set-thread-contention-monitoring-enabled contention-monitoring) + (impl/set-thread-cpu-time-enabled cpu-timing))) + +(use-fixtures :each save-thread-monitoring-state) + +;; Helper Functions + +(defn thread-allocated-bytes-overhead + ^long [] + (unchecked-subtract + (impl/thread-allocated-bytes (impl/current-thread-id)) + (impl/thread-allocated-bytes (impl/current-thread-id)))) + +;; ClassLoading Tests + +(deftest class-loading-test + (testing "class loader counts returns expected structure" + (let [counts (impl/class-loader-counts)] + (is (contains? counts :loaded-count)) + (is (contains? counts :unloaded-count)) + (is (number? (:loaded-count counts))) + (is (number? (:unloaded-count counts))))) + + (testing "class loader counts change calculation" + (let [first-sample {:loaded-count 100 :unloaded-count 10} + last-sample {:loaded-count 120 :unloaded-count 15} + change (impl/class-loader-counts-change first-sample last-sample)] + (is (= {:loaded-count 20 :unloaded-count 5} change))))) + +;; Memory Tests + +(deftest memory-test + (testing "memory sample contains expected structure" + (let [sample (impl/memory-sample)] + (is (some? (:heap sample))) + (is (some? (:non-heap sample))))) + + (testing "memory usage calculation" + (let [sample (impl/memory-sample) + usage (impl/memory sample)] + (is (contains? usage :heap)) + (is (contains? usage :non-heap)) + (is (contains? usage :total)) + (doseq [section [:heap :non-heap :total]] + (let [section-data (get usage section)] + (is (contains? section-data :committed)) + (is (contains? section-data :init)) + (is (contains? section-data :max)) + (is (contains? section-data :used)))))) + + (testing "finalization tracking" + (let [sample (impl/finalization-sample) + result (impl/finalization sample)] + (is (contains? result :pending)) + (is (number? (:pending result)))))) + +;; Thread Tests + +(deftest thread-monitoring-test + (testing "thread-allocated-bytes has no memory overhead" + (is (= 0 (thread-allocated-bytes-overhead)))) + + (testing "thread info structure" + (let [id (impl/current-thread-id) + sample (impl/thread-sample id)] + (is (contains? sample :thread-info)) + (is (contains? sample :cpu-time)) + (is (contains? sample :user-time)) + (is (contains? sample :allocated)) + + (let [info (impl/thread sample)] + (is (contains? (:thread-info info) :blocked-count)) + (is (contains? (:thread-info info) :blocked-time-ms)) + (is (contains? (:thread-info info) :waited-count)) + (is (contains? (:thread-info info) :waited-time-ms))))) + + (testing "thread monitoring can be enabled/disabled" + (impl/set-thread-contention-monitoring-enabled true) + (impl/set-thread-cpu-time-enabled true) + (let [id (impl/current-thread-id) + sample (impl/thread-sample id)] + (is (number? (:cpu-time sample))) + (is (number? (:user-time sample)))))) + +;; Compilation Tests + +(def ^:private compilation-changes-data + [{:name "basic increase" + :first {:time-ms 100} + :last {:time-ms 150} + :expected {:time-ms 50}} + {:name "no change" + :first {:time-ms 100} + :last {:time-ms 100} + :expected {:time-ms 0}} + {:name "decrease (unusual but possible)" + :first {:time-ms 150} + :last {:time-ms 100} + :expected {:time-ms -50}}]) + +(deftest compilation-test + (testing "JIT compiler name" + (let [name (impl/jit-name)] + (is (string? name)) + (is (not (str/blank? name))))) + + (testing "compilation time monitoring" + (let [sample (impl/compilation-sample)] + ;; Should either be -1 (unsupported) or a non-negative number + (is (or (= sample -1) + (and (number? sample) + (>= sample 0)))))) + + (testing "compilation sample wrapper" + (let [sample 100 + result (impl/compilation sample)] + (is (= {:time-ms 100} result)))) + + (testing "compilation change calculations" + (doseq [{:keys [name first last expected]} compilation-changes-data] + (testing name + (is (= expected + (impl/compilation-change + (:time-ms first) + (:time-ms last)))))))) + +;; Memory Pool Tests + +(def ^:private memory-usage-keys + #{:committed :init :max :used}) + +(deftest memory-pool-test + (testing "memory pool names" + (let [names (impl/memory-pool-names)] + (is (vector? names)) + (is (pos? (count names))) + (is (every? string? names)) + (is (every? (complement str/blank?) names)))) + + (testing "memory pool keywords" + (let [keywords impl/memory-pool-keywords] + (is (vector? keywords)) + (is (= (count (impl/memory-pool-names)) (count keywords))) + (is (every? keyword? keywords)))) + + (testing "memory pools sample structure" + (let [sample (impl/memory-pools-sample)] + (is (vector? sample)) + (is (pos? (count sample))) + (is (every? #(instance? java.lang.management.MemoryUsage %) sample)))) + + (testing "memory pools usage calculation" + (let [sample (impl/memory-pools-sample) + pools (impl/memory-pools sample)] + ;; Check total is present + (is (contains? pools :total)) + + ;; All values should be maps with standard memory usage keys + (doseq [[_pool-name usage] pools] + (is (map? usage)) + (is (= memory-usage-keys (set (keys usage)))) + (doseq [v (vals usage)] + (is (number? v))))) + + ;; Verify total is sum of other pools + (let [sample (impl/memory-pools-sample) + pools (impl/memory-pools sample) + pool-vals (dissoc pools :total) + computed-total (reduce #'impl/val-sum (vals pool-vals))] + (is (= computed-total (:total pools))))) + + (testing "memory pools change calculation" + (let [sample (impl/memory-pools-sample) + ;; Create artificial change by doubling values + changed-sample (mapv #(let [usage ^java.lang.management.MemoryUsage %] + (java.lang.management.MemoryUsage. + (* 2 (.getInit usage)) + (* 2 (.getUsed usage)) + (* 2 (.getCommitted usage)) + (if (pos? (.getMax usage)) + (* 2 (.getMax usage)) + (.getMax usage)))) + sample) + change (impl/memory-pools-change sample changed-sample)] + ;; Structure tests + (is (contains? change :total)) + (is (= (count impl/memory-pool-keywords) + (count (dissoc change :total))))))) + +;; Garbage Collector Tests + +(def ^:private gc-sample-structure-keys + #{:count :time-ms}) + +(def ^:private gc-change-test-data + [{:name "Basic collection occurred" + :first {:count 10 :time-ms 100} + :last {:count 11 :time-ms 150} + :expected {:count 1 :time-ms 50}} + {:name "Multiple collections" + :first {:count 10 :time-ms 100} + :last {:count 15 :time-ms 200} + :expected {:count 5 :time-ms 100}} + {:name "No collections occurred" + :first {:count 10 :time-ms 100} + :last {:count 10 :time-ms 100} + :expected {:count 0 :time-ms 0}}]) + +(deftest garbage-collector-test + (testing "GC bean names" + (let [names (impl/garbage-collector-names)] + (is (vector? names)) + (is (pos? (count names))) + (is (every? string? names)) + (is (every? (complement str/blank?) names)))) + + (testing "GC keywords generation" + (let [keywords impl/garbage-collector-keywords] + (is (vector? keywords)) + (is (= (count (impl/garbage-collector-names)) (count keywords))) + (is (every? keyword? keywords)))) + + (testing "GC sample structure" + (let [sample (impl/garbage-collector-sample)] + (is (vector? sample)) + (is (pos? (count sample))) + (doseq [collector sample] + (is (map? collector)) + (is (= gc-sample-structure-keys + (set (keys collector)))) + (is (nat-int? (:count collector))) + (is (nat-int? (:time-ms collector)))))) + + (testing "GC metrics aggregation" + (let [sample (impl/garbage-collector-sample) + metrics (impl/garbage-collector sample)] + ;; Check structure + (is (contains? metrics :total)) + (is (= (count impl/garbage-collector-keywords) + (count (dissoc metrics :total)))) + + ;; Verify all collectors present + (doseq [k impl/garbage-collector-keywords] + (is (contains? metrics k))) + + ;; Check data types + (doseq [[_name data] metrics] + (is (map? data)) + (is (= gc-sample-structure-keys (set (keys data)))) + (is (nat-int? (:count data))) + (is (nat-int? (:time-ms data)))) + + ;; Verify total is sum of all collectors + (let [collectors-data (vals (dissoc metrics :total)) + computed-total (reduce #'impl/val-sum collectors-data)] + (is (= computed-total (:total metrics)))))) + + (testing "GC change calculations" + (doseq [{:keys [name first last expected]} gc-change-test-data] + (testing name + (is (= expected + (#'impl/val-diff last first))))) + + (testing "changes with actual GC data" + (let [first-sample (impl/garbage-collector-sample) + ;; Force some GC activity + _ (System/gc) + last-sample (impl/garbage-collector-sample) + changes (impl/garbage-collector-change first-sample last-sample)] + ;; Structure tests + (is (contains? changes :total)) + (is (= (count impl/garbage-collector-keywords) + (count (dissoc changes :total)))) + + ;; Data validation + (doseq [[_name change-data] changes] + (is (map? change-data)) + (is (= gc-sample-structure-keys + (set (keys change-data)))) + ;; Changes can be negative if GC runs between samples + (is (integer? (:count change-data))) + (is (integer? (:time-ms change-data)))))))) + +;; Runtime Info Tests + +(deftest runtime-info-test + (testing "runtime details contains expected fields" + (let [details (impl/runtime-details)] + (is (contains? details :name)) + (is (contains? details :spec-name)) + (is (contains? details :spec-vendor)) + (is (contains? details :vm-name)) + (is (contains? details :vm-vendor)) + (is (contains? details :vm-version)) + (is (contains? details :java-version)) + (is (contains? details :clojure-version)))) + + (testing "system properties are accessible" + (let [props (impl/system-properties)] + (is (instance? java.util.HashMap props) (str (type props))) + (is (contains? props "java.version")) + (is (string? (get props "java.version")))))) diff --git a/bases/criterium/test/criterium/jvm_test.clj b/bases/criterium/test/criterium/jvm_test.clj new file mode 100644 index 0000000..2b1c2c5 --- /dev/null +++ b/bases/criterium/test/criterium/jvm_test.clj @@ -0,0 +1,292 @@ +(ns criterium.jvm-test + "Tests for criterium.jvm - JVM monitoring and management interface. + + Tests cover four main categories: + 1. Time Management - timestamp, elapsed time, and wait functionality + 2. Memory Management - GC control and memory usage monitoring + 3. Thread Management - thread identification and resource tracking + 4. JMX Bean Access - JVM runtime metrics and statistics + + Testing approach: + - Zero garbage validation for sampling functions + - Thread safety verification where applicable + - Return type and content validation + - Edge case handling for critical functions + + Key test principles: + - Use cleanup fixtures to ensure consistent test environment + - Separate unit tests from allocation tracking tests + - Verify both functionality and performance characteristics + - Document expected behavior and edge cases" + (:require + [clojure.test :refer [deftest is testing use-fixtures]] + [criterium.agent :as agent] + [criterium.jvm :as jvm])) + +;; Test fixture for cleanup +(defn cleanup-fixture [f] + (try + (f) + (finally + (jvm/run-finalization-and-force-gc!)))) + +(use-fixtures :each cleanup-fixture) + +(deftest elapsed-timestamp-test + (let [ts (jvm/timestamp)] + (is (zero? (jvm/elapsed-time ts ts))) + (is (= 1 (jvm/elapsed-time ts (inc ts)))) + (is (= -1 (jvm/elapsed-time (inc ts) ts))))) + +;;; Time Management Tests + +(deftest wait-test + (testing "wait delays for specified duration" + (let [start (jvm/timestamp) + _ (jvm/wait 1000000) ; 1ms + end (jvm/timestamp) + elapsed (jvm/elapsed-time start end)] + (is (>= elapsed 1000000) + "Should wait at least the specified time") + (is (< elapsed 10000000) + "Should not wait significantly longer than specified"))) + (testing "wait returns nil" + (is (nil? (jvm/wait 1000)) + "Should return nil to avoid allocations"))) + +;;; Memory Management Tests + +(deftest gc-control-test + (testing "GC control functions execute without errors" + (is (nil? (jvm/force-gc!)) "force-gc! should return nil") + (is (nil? (jvm/run-finalization!)) "run-finalization! should return nil") + (is (nil? (jvm/run-finalization-and-force-gc!)) + "run-finalization-and-force-gc! should return nil"))) + +;;; JMX Bean Tests + +(deftest jmx-bean-tests + (testing "JIT compilation info" + (let [compile-time (jvm/compilation-sample)] + (is (instance? Long compile-time)) + (when (pos? compile-time) + (let [info (jvm/compilation compile-time)] + (is (map? info) + "Should return compilation info map") + (is (:time-ms info) + "Should contain compilation time"))))) + + (testing "Memory pool operations" + (let [pools (jvm/memory-pools)] + (is (map? pools) "Should return memory pools map") + (is (every? (fn [[k v]] + (and (keyword? k) + (= #{:committed :init :max :used} + (set (keys v))))) + pools) + "Each pool should have type and usage info"))) + + (testing "Garbage collector operations" + (let [gc-names (jvm/garbage-collector-names) + gc-info (jvm/garbage-collector)] + (is (seq gc-names) "Should return GC names") + (is (map? gc-info) "Should return GC info map") + (is (every? (fn [[k v]] + (and (keyword? k) + (number? (:count v)) + (number? (:time-ms v)))) + gc-info) + "Each GC should have name, count and time"))) + + (testing "Operating system info" + (let [os (jvm/os-details)] + (is (map? os) "Should return OS details map") + (is (every? os [:name :version :arch :available-processors]) + "Should contain all OS details"))) + + (testing "Runtime details" + (let [rt (jvm/runtime-details)] + (is (map? rt) "Should return runtime details map") + (is (every? rt [:name + :vm-name :vm-vendor :vm-version + :java-version :java-runtime-version + :input-arguments]) + "Should contain all runtime details")))) + +;;; Thread Management Tests + +(deftest thread-management-test + (testing "Current thread operations" + (let [thread-id (jvm/current-thread-id)] + (is (pos? thread-id) "Thread ID should be positive") + + (let [cpu-time (jvm/thread-cpu-time)] + (is (or (= -1 cpu-time) (not (neg? cpu-time))) + "CPU time should be -1 if unsupported or non-negative")) + + (let [allocated (jvm/thread-allocated-bytes)] + (is (or (= -1 allocated) (not (neg? allocated))) + "Allocated bytes should be -1 if unsupported or non-negative")) + + (let [sample (jvm/thread-sample)] + (is sample "Thread sample should not be nil") + (let [info (jvm/thread sample)] + (is (map? info) "Thread info should be a map"))))) + + (testing "Thread allocation change calculation" + (is (= 100 (jvm/thread-allocated-change 900 1000)) + "Should calculate positive change correctly") + (is (= -100 (jvm/thread-allocated-change 1000 900)) + "Should calculate negative change correctly"))) + +;;; Metric Collection Tests + +(deftest metric-collection-test + (testing "Single metric collection" + (doseq [metric-key [:class-loader-counts :compilation + :garbage-collector :memory :memory-pools + :thread]] + (testing (str "Collecting " metric-key) + (let [metric (jvm/collect-metric metric-key)] + (is (map? metric) + (str metric-key " should return a map"))))))) + +;;; Garbege Free Tests + +(def warmup-count 10) + +(deftest zero-garbage-test + (testing "Sampling is zero garbage" + (testing "for class-loader-counts" + ;; run once for function initialisation + (dotimes [_ warmup-count] + (agent/with-allocation-tracing (jvm/class-loader-counts))) + (let [[allocations _res] (agent/with-allocation-tracing + (jvm/class-loader-counts)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (zero? freed-bytes)))) + (testing "for compilation" + (dotimes [_ warmup-count] + (agent/with-allocation-tracing (jvm/compilation-sample))) + (let [[allocations _res] (agent/with-allocation-tracing + (jvm/compilation-sample)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))))) + (testing "for garbage-collectpr" + (dotimes [_ warmup-count] + (agent/with-allocation-tracing (jvm/garbage-collector-sample))) + (let [[allocations _res] (agent/with-allocation-tracing + (jvm/garbage-collector-sample)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))))) + (testing "for memory" + (dotimes [_ warmup-count] + (agent/with-allocation-tracing (jvm/memory-sample))) + (let [[allocations _res] (agent/with-allocation-tracing + (jvm/memory-sample)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))))) + (testing "for memory-pools" + (dotimes [_ warmup-count] + (agent/with-allocation-tracing (jvm/memory-pools-sample))) + (let [[allocations _res] (agent/with-allocation-tracing + (jvm/memory-pools-sample)) + {:keys [freed-bytes]} (->> allocations + (filterv (agent/allocation-on-thread?)) + agent/allocations-summary)] + (is (zero? freed-bytes) + (->> allocations + (filterv (agent/allocation-on-thread?)) + (filterv agent/allocation-freed?))))) + + ;; unfortunately this can not be made garbage free + (comment + (testing "for thread" + (let [[allocations _res] (agent/with-allocation-tracing + (jvm/thread-sample)) + {:keys [freed-bytes]} (agent/allocations-summary + (filterv (agent/allocation-on-thread?) allocations))] + (is (zero? freed-bytes) + (mapv agent/allocation-freed? + (filterv (agent/allocation-on-thread?) allocations)))))))) + +(comment + (deftest transient-persistent-overhead-test + (is (= 48 + (let [x (transient {})] + (jvm/allocated-bytes (persistent! x))))))) + +;; (deftest allocation-test +;; (testing "compilation-sample does not create garbage" +;; (let [fc (jvm/compilation-sample) +;; _ (jvm/compilation-diff! fc) +;; fc (jvm/compilation!) +;; x (jvm/current-thread-allocated-bytes) +;; fcd (jvm/compilation-diff! fc) +;; d (- (jvm/current-thread-allocated-bytes) x)] +;; (is (= 0 d)) +;; (is fcd "keep a refernce until after diff test"))) +;; (testing "finalization! and finalization-diff! do not create garbage" +;; (let [fc (jvm/finalization!) +;; _ (jvm/finalization-diff! fc) +;; fc (jvm/finalization!) +;; x (jvm/current-thread-allocated-bytes) +;; fcd (jvm/finalization-diff! fc) +;; d (- (jvm/current-thread-allocated-bytes) x)] +;; (is (= 0 d)) +;; (is fcd "keep a refernce until after diff test"))) +;; (testing "garbage-collector-raw functions do not create garbage" +;; (let [fc (jvm/garbage-collector-raw) +;; x (jvm/current-thread-allocated-bytes) +;; _ (agent/with-allocation-tracing +;; (jvm/garbage-collector-diff-raw fc)) +;; d (- (jvm/current-thread-allocated-bytes) x) +;; res (jvm/garbage-collector-map-raw fc)] +;; (is (map? res)) +;; (is (= 0 d) "no garbage object genetation"))) +;; ;; (testing "garbage-collector! and garbage-collector-diff! do not create garbage" +;; ;; (agent/with-allocation-tracing +;; ;; (let [fc (jvm/garbage-collector!) +;; ;; _ (jvm/garbage-collector-diff! fc) +;; ;; fc (jvm/garbage-collector!) +;; ;; x (jvm/current-thread-allocated-bytes) +;; ;; fcd (jvm/garbage-collector-diff! fc) +;; ;; d (- (jvm/current-thread-allocated-bytes) x)] +;; ;; (is (= 0 d)) +;; ;; (is fcd "keep a refernce until after diff test")))) +;; (testing "memory! and memory-diff! do not create garbage" +;; (let [fc (jvm/memory!) +;; x (jvm/current-thread-allocated-bytes) +;; _ (agent/with-allocation-tracing +;; (jvm/memory-diff! fc)) +;; d (- (jvm/current-thread-allocated-bytes) x) +;; res (jvm/memory-diff-map fc)] +;; (is (map? res)) +;; (is (= 0 d)))) +;; ;; (testing "memory! and memory-diff! do not create garbage" +;; ;; (let [fc (jvm/memory!) +;; ;; _ (jvm/memory-diff! fc) +;; ;; fc (jvm/memory!) +;; ;; x (jvm/current-thread-allocated-bytes) +;; ;; fcd (jvm/memory-diff! fc) +;; ;; d (- (jvm/current-thread-allocated-bytes) x)] +;; ;; (is (= 0 d)) +;; ;; (is fcd "keep a refernce until after diff test"))) +;; ) diff --git a/bases/criterium/test/criterium/measured_test.clj b/bases/criterium/test/criterium/measured_test.clj new file mode 100644 index 0000000..0b03a75 --- /dev/null +++ b/bases/criterium/test/criterium/measured_test.clj @@ -0,0 +1,97 @@ +(ns criterium.measured-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.agent :as agent] + [criterium.jvm :as jvm] + [criterium.measured :as measured])) + +(defn- inc-long [x] (inc (long x))) + +(defn invoke + "Invoke the given Measured. + + Calls the Measured's function with the result of calling the + Measured's state function." + ([measured] + (invoke measured 1)) + ([measured eval-count] + (measured/invoke + measured + (measured/args measured) + eval-count))) + +(deftest measured-test + (let [eval-count (volatile! 0) + m (measured/measured + (fn [] :arg) + (fn [arg ^long n] + (vswap! eval-count #(+ n ^long %)) + [1 [arg arg]]) + (fn [] ::symbolic))] + (is (measured/measured? m)) + (is (= ::symbolic (measured/symbolic m))) + (testing "invoke calls the function with one eval" + (vreset! eval-count 0) + (is (= [1 [:arg :arg]] (invoke m))) + (is (= 1 @eval-count))) + (testing "invoke with eval-count calls the function with the eval count." + (vreset! eval-count 0) + (is (= [1 [:arg :arg]] (invoke m 3))) + (is (= 3 @eval-count))))) + +(deftest expr-test + (testing "nil expr" + (let [nil-m (measured/expr nil)] + (is (nil? (second (invoke nil-m)))))) + (testing "const expr" + (let [const-m (measured/expr ::const)] + (is (= ::const (second (invoke const-m)))))) + (testing "function call" + (let [fncall-m (measured/expr (identity ::value))] + (is (= ::value (second (invoke fncall-m)))))) + (testing "recursive function call" + (let [call-count (volatile! 0) + f (fn [v] (vswap! call-count inc-long) v) + recursive-fncall-m (measured/expr (f (f ::value)))] + (is (= ::value (second (invoke recursive-fncall-m)))) + (is (= 2 @call-count)))) + (testing "const expression is lifted" + (let [const-expr-m (measured/expr (identity (+ 1 2)))] + (is (= 3 (second (invoke const-expr-m)))) + (is (= [3] ((:args-fn const-expr-m)))))) + (testing "args are type hinted" + ;; if this gives a reflection warning then it should be treated as + ;; an error. + (let [vec-nth-m (measured/expr (.nth [0 1 3] 1))] + (is (= 1 (second (invoke vec-nth-m)))))) + (testing "accepts time-fn option" + (let [invokes (volatile! 0) + f (fn ^long [] + (vswap! invokes inc-long) + (jvm/thread-cpu-time)) + m (measured/expr 1 {:time-fn f})] + (is (= 1 (second (invoke m)))) + (is (= 2 @invokes)))) + (testing "with transduce" + (let [m (measured/expr + (transduce (comp (filter odd?) (map inc)) + (range 5)))] + (is (= 6 (second (invoke m))))))) + +(deftest zero-garbage-test + (testing "return value is zero garbage" + (let [measured (measured/measured + (fn [] nil) + (fn [_ _] [1 2])) + _ (dotimes [_ 1000] + (measured/invoke measured nil 1)) + [allocations ret] (agent/with-allocation-tracing + (measured/invoke measured nil 1)) + thread-allocations (->> allocations + (filterv (agent/allocation-on-thread?))) + {:keys [freed-bytes]} (-> thread-allocations + agent/allocations-summary)] + (is (zero? freed-bytes) thread-allocations) + (when-not (zero? (long freed-bytes)) + (tap> {:zero-garbage-test + {:allocations (frequencies thread-allocations)}})) + (is (= [1 2] ret) "hold reference to return value until end of test")))) diff --git a/bases/criterium/test/criterium/metric_test.clj b/bases/criterium/test/criterium/metric_test.clj new file mode 100644 index 0000000..5d12355 --- /dev/null +++ b/bases/criterium/test/criterium/metric_test.clj @@ -0,0 +1,127 @@ +(ns criterium.metric-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.collector.metrics :as metrics] + [criterium.metric :as metric])) + +(deftest filter-metric-values-test + (testing "filter-metric-values" + (let [values [{:dimension :time :value 1} + {:dimension :memory :value 2} + {:dimension :time :value 3}]] + (testing "filters by predicate" + (is (= [{:dimension :time :value 1} + {:dimension :time :value 3}] + (metric/filter-metric-values + #(= :time (:dimension %)) + values)))) + + (testing "returns empty vector when no matches" + (is (= [] + (metric/filter-metric-values + #(= :not-found (:dimension %)) + values)))) + + (testing "returns empty vector for empty input" + (is (= [] + (metric/filter-metric-values + #(= :time (:dimension %)) + []))))))) + +(deftest filter-metrics-test + (testing "filter-metrics" + (let [metric-map {:top-level + {:type :quantitative + :values [{:dimension :time :value 1} + {:dimension :memory :value 2}] + :groups {"group1" + {:values [{:dimension :time :value 3} + {:dimension :count :value 4}]} + "group2" + {:values [{:dimension :memory :value 5}]}}}}] + + (testing "filters values at top level" + (let [result (metric/filter-metrics + metric-map + #(= :time (:dimension %)))] + (is (= [{:dimension :time :value 1}] + (:values (:top-level result)))) + (is (contains? (:top-level result) :type)))) + + (testing "filters nested groups" + (let [result (metric/filter-metrics + metric-map + #(= :time (:dimension %)))] + (is (= {"group1" + {:values [{:dimension :time :value 3}]}} + (:groups (:top-level result)))))) + + (testing "removes empty groups" + (let [result (metric/filter-metrics + metric-map + #(= :count (:dimension %)))] + (is (= {"group1" + {:values [{:dimension :count :value 4}]}} + (:groups (:top-level result)))) + (is (empty? (:values (:top-level result))))))))) + +(deftest dimension-pred-test + (testing "dimension-pred" + (let [pred (metric/dimension-pred :time)] + (testing "matches correct dimension" + (is (pred {:dimension :time :value 1}))) + + (testing "does not match different dimension" + (is (not (pred {:dimension :memory :value 1})))) + + (testing "does not match missing dimension" + (is (not (pred {:value 1}))))))) + +(deftest type-pred-test + (testing "type-pred" + (let [pred (metric/type-pred :event)] + (testing "matches correct type" + (is (pred {:type :event :value 1}))) + + (testing "does not match different type" + (is (not (pred {:type :quantitative :value 1})))) + + (testing "does not match missing type" + (is (not (pred {:value 1}))))))) + +(deftest gc-integration-test + (testing "garbage collector metric filtering" + (let [metrics (metrics/metrics)] + (testing "filters time dimension across groups" + (let [result (metric/filter-metrics + metrics + (metric/dimension-pred :time))] + (is (every? (comp #{:time} :dimension) + (mapcat + (comp :values second) + (:groups (:garbage-collector result))))) + (is (= (count (:groups (:garbage-collector metrics))) + (count (:groups (:garbage-collector result))))))) + + (testing "filters count dimension across groups" + (let [result (metric/filter-metrics + metrics + (metric/dimension-pred :count))] + (is (every? (comp #{:count} :dimension) + (mapcat + (comp :values second) + (:groups result)))) + (is (= (count (:groups metrics)) + (count (:groups result)))))) + + (testing "filters by event type" + (let [result (metric/filter-metrics + metrics + (metric/type-pred :event))] + (is (= (:garbage-collector metrics) (:garbage-collector result))))) + + (testing "removes all groups for non-matching type" + (let [result (metric/filter-metrics + metrics + (metric/type-pred :quantitative))] + (is (empty? (:groups result)))))))) diff --git a/bases/criterium/test/criterium/platform_test.clj b/bases/criterium/test/criterium/platform_test.clj new file mode 100644 index 0000000..d162eb7 --- /dev/null +++ b/bases/criterium/test/criterium/platform_test.clj @@ -0,0 +1,16 @@ +(ns criterium.platform-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.platform :as platform])) + +(deftest platform-point-estimates-test + (testing "platform-point-estimates" + (let [res (platform/platform-point-estimates {:limit-time-s 0.001})] + (is (= [:latency + :granularity + :constant-long + :constant-double + :constant-object + :constant-nil] + (keys res))) + (is (every? double? (vals res)))))) diff --git a/bases/criterium/test/criterium/probability_test b/bases/criterium/test/criterium/probability_test new file mode 100644 index 0000000..fecfe33 --- /dev/null +++ b/bases/criterium/test/criterium/probability_test @@ -0,0 +1,36 @@ +(ns criterium.probability-test) + +;; Values from R, qnorm (with options(digits=15)) +(deftest normal-quantile-test + (is (pos? (probability/normal-quantile 0.5001))) + (is (neg? (stats/normal-quantile 0.4999))) + (is (< 2e-8 (- (stats/normal-quantile 0.999) (stats/normal-quantile 0.001)))) + (let [max-error 1.0e-7] + (is (= 0.0 (stats/normal-quantile 0.5))) + (is (test-max-error 1.2815515655446 (stats/normal-quantile 0.9) max-error)) + (is (test-max-error 0.674489750196082 (stats/normal-quantile 0.75) max-error)) + (is (test-max-error -1.03643338949379 (stats/normal-quantile 0.15) max-error)) + (is (test-max-error -2.32634787404084 (stats/normal-quantile 0.01) max-error)))) + + +;; Values from R, erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1 + + +(deftest erf-test + (let [max-error 1.5e-7] + (test-max-error 0.999999984582742 (stats/erf 4) max-error) + (test-max-error 0.995322265018953 (stats/erf 2) max-error) + (test-max-error 0.842700792949715 (stats/erf 1) max-error) + (test-max-error 0.112462916018285 (stats/erf 0.1) max-error) + (test-max-error 0.0112834155558497 (stats/erf 0.01) max-error))) + +;; Values from R, pnorm +(deftest normal-cdf-test + (let [max-error 1.5e-7] + (test-max-error 0.99865010196837 (stats/normal-cdf 3.0) max-error) + (test-max-error 0.977249868051821 (stats/normal-cdf 2.0) max-error) + (test-max-error 0.841344746068543 (stats/normal-cdf 1.0) max-error) + (test-max-error 0.691462461274013 (stats/normal-cdf 0.5) max-error) + (test-max-error 0.5 (stats/normal-cdf 0.0) max-error) + (test-max-error 0.158655253931457 (stats/normal-cdf -1.0) max-error) + (test-max-error 0.00134989803163009 (stats/normal-cdf -3.0) max-error))) diff --git a/bases/criterium/test/criterium/sampled_fn_test.clj b/bases/criterium/test/criterium/sampled_fn_test.clj new file mode 100644 index 0000000..c54f526 --- /dev/null +++ b/bases/criterium/test/criterium/sampled_fn_test.clj @@ -0,0 +1,24 @@ +(ns criterium.sampled-fn-test + (:require + [clojure.test :refer [deftest is]] + [criterium.collector-configs :as collector-configs] + [criterium.jvm :as jvm] + [criterium.sampled-fn :as sampled-fn] + [criterium.sampler :as sampler] + [criterium.util.t-digest :as t-digest])) + +(defn- ten-micros + [] + (jvm/wait 10000)) + +(deftest sampled-fn-test + (with-redefs + [ten-micros (sampled-fn/sample-fn + ten-micros + collector-configs/default-collector-config)] + (dotimes [_ 1000] + (ten-micros)) + + (let [samples (sampler/samples-map ten-micros) + digest (get-in samples [:metric->digest :elapsed-time])] + (is (< 10000 (t-digest/quantile digest 0.5) 10500))))) diff --git a/bases/criterium/test/criterium/test_data.clj b/bases/criterium/test/criterium/test_data.clj new file mode 100644 index 0000000..c830229 --- /dev/null +++ b/bases/criterium/test/criterium/test_data.clj @@ -0,0 +1,173 @@ +(ns criterium.test-data + (:require + [criterium.analyse :as analyse] + [criterium.collect-plan :as collect-plan] + [criterium.collector.metrics :as metrics] + [criterium.metric :as metric])) + +(defn bench-stats-map [] + {:data + {:samples + {:type :criterium/collected-metrics-samples + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :metric->values {} + :transform collect-plan/identity-transforms} + :stats + {:type :criterium/stats + :stats {:elapsed-time + {:mean 100.0 + :variance 16.0 + :mean-plus-3sigma 112.0 + :mean-minus-3sigma 88.0 + :min-val 89.0 + :max-val 114.0}} + :transform collect-plan/identity-transforms + :batch-size 1 + :source-id :samples + :outliers-id nil + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time])}}}) + +(defn samples-with-2-values-map [] + {:metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :data + {:samples + {:type :criterium/collected-metrics-samples + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :metric->values {[:elapsed-time] [1 1]} + :transform collect-plan/identity-transforms}} + :batch-size 1 + :eval-count 2 + :num-samples 2 + :elapsed-time 1}) + +(defn samples-with-transformed-values-map [] + {:metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :data + {:samples + {:type :criterium/collected-metrics-samples + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :metric->values {[:elapsed-time] [2 4 8]} + :transform (#'collect-plan/batch-transforms 2)}} + :batch-size 3 + :eval-count 6 + :num-samples 3 + :elapsed-time 14}) + +(defn samples-with-variance-12-map [] + {:metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :data + {:samples + {:type :criterium/collected-metrics-samples + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :metric->values {[:elapsed-time] [1 1 1 5 5 5 9 9 9]} + :transform collect-plan/identity-transforms}} + :batch-size 1 + :eval-count 9 + :num-samples 9 + :elapsed-time 42}) + +(defn samples-with-outliers-values-map [] + {:metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :data + {:samples + {:type :criterium/collected-metrics-samples + :metrics-defs (select-keys (metrics/metrics) [:elapsed-time]) + :metric->values {[:elapsed-time] [9 10 9 10 9 10 10000]} + :transform collect-plan/identity-transforms + :batch-size 1 + :num-samples 7 + :eval-count 1 + :elapsed-time 1}}}) + +(defn outlier-count-map [] + {:data + {:outliers + {:type :criterium/outliers + :metrics-defs (select-keys (metrics/metrics) [:elapsed-time]) + :outliers {:elapsed-time + {:outlier-counts + (analyse/outlier-count 0 2 3 0)}} + :num-samples 1 + :source-id :samples + :quantiles-id :quantiles + :transform collect-plan/identity-transforms}} }) + +(defn outlier-significance-map [] + {:data + {:outlier-significance + {:type :criterium/outlier-significance + :outlier-significance {:elapsed-time + {:effect :moderate + :significance 0.25}} + :metrics-defs (-> (metrics/metrics) + (metric/select-metrics + [:elapsed-time]) + (metric/filter-metrics + (metric/type-pred + :quantitative))) + :num-samples 1 + :source-id :samples + :outliers-id :outliers + :transform collect-plan/identity-transforms}}}) + +(defn samples-for-event-stats-map + [] + (let [metrics-defs + (-> + (select-keys + (metrics/metrics) + [:class-loader :compilation]) + (assoc-in + [:garbage-collector] + {:type :event + :groups + {:total + {:summary + (str "%s: ran %s times" + " for a total of %s in %s samples") + :values + [{:path [:garbage-collector :total :count] + :scale 1 + :type :event + :label "GC total count" + :dimension :count} + {:path [:garbage-collector :total :time-ms] + :scale 1e-3 + :type :event + :label "GC total time" + :dimension :time}] + :label "Garbage Collector"}}}))] + {:data + {:samples + {:type :criterium/collected-metrics-samples + :metrics-defs metrics-defs + :metric->values {[:elapsed-time] [1] + [:compilation :time-ms] [3] + [:garbage-collector :total :time-ms] [1] + [:garbage-collector :total :count] [2] + [:class-loader :loaded-count] [1] + [:class-loader :unloaded-count] [1]} + :transform collect-plan/identity-transforms + :elapsed-time 0 + :num-samples 1 + :batch-size 1 + :eval-count 1 + :expr-value 1}}})) diff --git a/bases/criterium/test/criterium/test_utils.clj b/bases/criterium/test/criterium/test_utils.clj new file mode 100644 index 0000000..b8a5aa7 --- /dev/null +++ b/bases/criterium/test/criterium/test_utils.clj @@ -0,0 +1,180 @@ +(ns criterium.test-utils + (:require + [clojure.string :as str] + [clojure.test :refer [deftest is testing]] + [clojure.test.check.generators :as gen])) + +(defn abs-error + ^double [^double expected ^double actual] + (Math/abs (- expected actual))) + +(defn rel-error + ^double [^double expected ^double actual] + (let [e (abs-error expected actual)] + (if (zero? expected) + actual + (/ e (Math/abs expected))))) + +(def ^:private ^:const default-ulps 2) +(def ^:private ^:const default-rel-tolerance 1e-8) + +(defn ulp ^double [x] + (Math/ulp (double x))) + +(defn compare-doubles + "Compare expected and actual doubles. + + Uses both ULP and relative difference. Numbers are considered equal + if either criterion matches. Relative difference is calculated + relative to expected value. + + Parameters: + expected - Expected value + actual - Actual value to compare against expected + ulps - Max units in last place difference (default: 2) + rel-tolerance - Maximum relative difference (default: 1e-8)" + ([expected actual] + (compare-doubles expected actual default-rel-tolerance default-ulps)) + ([^double expected ^double actual ^double rel-tolerance ^long ulps] + (let [expected (double expected) + actual (double actual) + abs-expected (Math/abs expected) + diff (- actual expected) + abs-diff (Math/abs diff) + rel-diff (if (zero? expected) + diff + (/ diff abs-expected)) + abs-rel-diff (Math/abs rel-diff) + ulp-tolerance (* ulps (Math/ulp abs-expected))] + (when-not + (or (<= abs-diff ulp-tolerance) + (<= abs-rel-diff rel-tolerance)) + {:diff diff + :abs-diff abs-diff + :abs-rel-diff abs-rel-diff + :ulp-limit ulp-tolerance + :rel-tolerance rel-tolerance})))) + +(defn element-diffs [expected actual rel-tolerance ulps] + (let [element-diffs# + (mapv #(compare-doubles + %1 + %2 + (or rel-tolerance default-rel-tolerance) + (or ulps default-ulps)) + expected + actual)] + (->> element-diffs# + (map-indexed + (fn [i diff] + (when diff (assoc diff :i i)))) + (filterv some?) + not-empty))) + +(defmethod clojure.test/assert-expr 'approx= + [msg [_ expected actual & [rel-tolerance ulps]]] + `(let [diff# (if (sequential? ~expected) + (cond + (not (sequential? ~actual)) + {:type (type ~actual)} + (not= (count ~expected) (count ~actual)) + {:count-expected (count ~expected) + :count-actual (count ~actual)} + :else + (element-diffs ~expected ~actual ~rel-tolerance ~ulps)) + (compare-doubles + ~expected + ~actual + ~(if rel-tolerance rel-tolerance default-rel-tolerance) + ~(if ulps ulps default-ulps )))] + (clojure.test/do-report + {:type (if diff# :fail :pass) + :message ~msg + :expected ~expected + :actual ~actual + :diff diff#}))) + +(defn approx= [a b & [rel-tolerance ulps]] + (let [comp (compare-doubles + a + b + (or rel-tolerance default-rel-tolerance) + (or ulps default-ulps))] + (when comp + (prn :a a :b b comp)) + (nil? comp))) + +(defmacro test-max-error + ([expected actual max-error] + `(is (< (abs-error ~expected ~actual) ~max-error))) + ([expected actual max-error msg] + `(is (< (abs-error ~expected ~actual) ~max-error) ~msg))) + +(defn gen-bounded + "Generates a long in the range from min-val to max-val inclusive. + Unlike gen/choose, this is bounded by the generator's `size` parameter, + starting at min-val." + [^long min-val ^long max-val] + (let [r (- max-val min-val)] + (gen/fmap + (fn [^long x] (+ min-val x)) + (gen/sized + (fn [^long size] + (let [s (min size r)] + (gen/choose 0 s))))))) + +(defn gen-double [options] + (gen/double* + (merge {:infinite? false :NaN? false} options))) + +(defn trimmed-lines + [s] + (->> s + str/split-lines + (mapv str/trim))) + +(defn plus-frac ^double [^double x ^double f] + (+ x (* x f))) + +(deftest approximately=-test + (testing "exact equality" + (is (nil? (compare-doubles 1.0 1.0))) + (is (nil? (compare-doubles 0.0 0.0))) + (is (nil? (compare-doubles -1.0 -1.0)))) + + (testing "exact equality, integer args" + (is (nil? (compare-doubles 1 1))) + (is (nil? (compare-doubles 0 0))) + (is (nil? (compare-doubles -1 -1)))) + + (testing "ulp-based comparison" + (let [x (+ 0.1 0.2)] + (is (nil? (compare-doubles 0.3 x))))) + + (testing "ulp-based failure" + (let [x (+ 0.1 (* 3 (ulp 0.1)))] + (is (compare-doubles 0.3 x)))) + + (testing "relative tolerance" + (is (nil? (compare-doubles 100.0 100.1 0.01 2))) ; within 1% tolerance + (is (some? (compare-doubles 100.0 101.0 0.001 2)))) ; exceeds 0.1% tolerance + + (testing "near zero values" + (is (nil? (compare-doubles 0.0 1e-12))) ; within default tolerance + (is (some? (compare-doubles 0.0 1e-6)))) ; exceeds tolerance + + (testing "large numbers" + (is (nil? (compare-doubles 1e6 (plus-frac 1e6 1e-9)))) ; small rel diff + (is (some? (compare-doubles 1e6 (plus-frac 1e6 1e-7))))) ; large rel diff + + (testing "small numbers" + (is (nil? (compare-doubles 1e-6 (plus-frac 1e-6 1e-9)))) ; small rel diff + (is (some? (compare-doubles 1e-6 (plus-frac 1e-6 1e-7)))))) ; + +(comment + (deftest approx=-test + (is (approx= 1 2)) + (is (approx= 1 1)) + (is (approx= 1.0 (plus-frac 1.0 0.1))) + (is (approx= [1.0] [(plus-frac 1.0 0.1)])) + (is (approx= [1.0] [(plus-frac 1.0 1e-9)])))) diff --git a/bases/criterium/test/criterium/trigger_test.clj b/bases/criterium/test/criterium/trigger_test.clj new file mode 100644 index 0000000..c563b27 --- /dev/null +++ b/bases/criterium/test/criterium/trigger_test.clj @@ -0,0 +1,107 @@ +(ns criterium.trigger-test + "Tests for the criterium.trigger namespace. + + Verifies the core functionality of the trigger mechanism including: + - Trigger creation and state initialization + - Sample collection behavior + - Extra data attachment to samples + - Sample retrieval and reset behavior" + (:require + [clojure.test :refer [deftest is testing use-fixtures]] + [criterium.analyse :as analyse] + [criterium.sampler :as sampler] + [criterium.trigger :as trigger] + [criterium.util.helpers :as util])) + + +(def ^:private test-extra-data + "Sample extra data for testing" + {:test-key "test-value"}) + +(def ^:dynamic *trigger* + "Dynamic var for holding the current test trigger instance") + +(defn trigger-fixture + "Test fixture that provides a fresh trigger for each test" + [f] + (binding [*trigger* (trigger/trigger)] + (f))) + +(use-fixtures :each trigger-fixture) + +(deftest trigger-initialization-test + (testing "A new trigger is initialized" + (let [t (trigger/trigger)] + (testing "Initial trigger state" + (is (zero? (-> t :state deref :last-triggered)) + "Last triggered should start at 0") + (is (empty? (-> t :state deref :samples)) + "Samples should start empty"))))) + +(deftest trigger-firing-test + (testing "A trigger" + (testing "First fire only records timestamp" + (trigger/fire! *trigger*) + (is (pos? (-> *trigger* :state deref :last-triggered)) + "Should record first timestamp") + (is (empty? (-> *trigger* :state deref :samples)) + "Should not collect sample on first fire"))) + + (testing "Second fire creates first sample" + (trigger/fire! *trigger*) + (is (= 1 (count (-> *trigger* :state deref :samples))) + "Should collect first sample") + (let [sample (first (-> *trigger* :state deref :samples))] + (is (contains? sample :elapsed-time) "Sample should contain elapsed time") + (is (pos? (:elapsed-time sample)) "Elapsed time should be positive"))) + + (testing "Additional fires add more samples" + (trigger/fire! *trigger*) + (is (= 2 (count (-> *trigger* :state deref :samples))) + "Should accumulate samples"))) + +(deftest extra-data-test + (testing "Test attaching extra data to samples" + (testing "Samples include attached extra data" + (trigger/fire! *trigger*) + (trigger/fire! *trigger* test-extra-data) + (let [sample (first (-> *trigger* :state deref :samples))] + (is (= test-extra-data (select-keys sample (keys test-extra-data))) + "Extra data should be merged into sample"))))) + +(deftest sampler-protocol-test + (testing "trigger with extra data" + (testing "Sample map structure" + (trigger/fire! *trigger*) + (trigger/fire! *trigger*) + (let [samples-map (sampler/samples-map *trigger*)] + (is (= 1 (:batch-size samples-map)) + "Batch size should be 1 for triggers") + (is (= 1 (:eval-count samples-map)) + "Eval count should match sample count") + (is (map? (:metric->values samples-map)) + "Samples should be returned as a map") + (is (pos? + (first (get-in samples-map [:metric->values [:elapsed-time]]))) + "Samples should contain positive elapsed times")))) + + (testing "Trigger reset after sample retrieval" + (sampler/reset-samples! *trigger*) + (let [samples-map (sampler/samples-map *trigger*)] + (is (empty? (get-in samples-map [:metric->values [:elapsed-time]])) + "Samples should be cleared")))) + +(deftest benchmark-integration-test + (testing "A trigger sample map" + (testing "can be analyzed with benchmark functions" + (trigger/fire! *trigger*) + (trigger/fire! *trigger*) + (trigger/fire! *trigger*) + (let [samples-map (sampler/samples-map *trigger*) + analyzed ((analyse/stats {}) + {:samples samples-map})] + (is (-> analyzed :stats) "Should contain statistical analysis") + (is (pos? (get-in + (-> analyzed :stats util/stats) + [:elapsed-time :mean])) + "Should calculate positive mean elapsed time"))))) diff --git a/bases/criterium/test/criterium/util/bootstrap_test.clj b/bases/criterium/test/criterium/util/bootstrap_test.clj new file mode 100644 index 0000000..8493880 --- /dev/null +++ b/bases/criterium/test/criterium/util/bootstrap_test.clj @@ -0,0 +1,189 @@ +(ns criterium.util.bootstrap-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.analyse-test :refer [metrics-samples]] + [criterium.test-utils :refer [test-max-error]] + [criterium.util.bootstrap :as bootstrap] + [criterium.util.invariant :refer [have]] + [criterium.util.sampled-stats-test :as sampled-stats-test] + [criterium.util.stats :as stats] + [criterium.util.well :as well] + [criterium.util.helpers :as util])) + +(deftest bootstrap-estimate-test + (is (= [1.0 0.0 [1.0 1.0]] + (bootstrap/bootstrap-estimate (take 20 (repeatedly (constantly 1)))))) + (is (= [2.0 0.0 [2.0 2.0]] + (bootstrap/bootstrap-estimate (take 20 (repeatedly (constantly 2)))))) + ;; (is (= [1/2 0.26315789473684204 [-0.5054587850434509 1.5054587850434509]] + ;; (bootstrap-estimate (take 20 (cycle [0 1]))))) + (let [[m s [l u]] (bootstrap/bootstrap-estimate + (take 1000000 (repeatedly rand)))] + (is (test-max-error 0.5 m 1e-2)) + (is (test-max-error 0.0 l 0.2)) + (is (test-max-error 1.0 u 0.2)) + (is (test-max-error 0.0833 s 0.2)))) + +(deftest bootstrap-estimate-scale-test + (is (= [1e-9 [1e-8 1e-8]] + (bootstrap/scale-bootstrap-estimate + (bootstrap/->BcaEstimate 1 [{:value 10} {:value 10}]) + 1e-9)))) + +(deftest bootstrap-test + (is (= [1.0 0.0 [1.0 1.0]] + (bootstrap/bootstrap (take 20 (repeatedly (constantly 1))) + stats/mean + 100 + well/well-rng-1024a))) + (is (= [[1.0 0.0 [1.0 1.0]] [0.0 0.0 [0.0 0.0]]] + (bootstrap/bootstrap (take 20 (repeatedly (constantly 1))) + (juxt stats/mean stats/variance) + 100 + well/well-rng-1024a)))) + +(deftest bootstrap-bca-test + (let [ci 0.95] + (is (= (bootstrap/map->BcaEstimate + {:point-estimate 1.0 + :estimate-quantiles [{:value 1.0 :alpha 0.95} + {:value 1.0 :alpha (- 1 0.95)}]}) + (bootstrap/bootstrap-bca (take 20 (repeatedly (constantly 1))) + stats/mean + 100 + [0.5 ci (- 1.0 ci)] + well/well-rng-1024a))) + (is (= [(bootstrap/map->BcaEstimate + {:point-estimate 1.0 + :estimate-quantiles [{:value 1.0 :alpha 0.95} + {:value 1.0 :alpha (- 1 0.95)}]}) + (bootstrap/map->BcaEstimate + {:point-estimate 0.0 + :estimate-quantiles [{:value 0.0 :alpha 0.95} + {:value 0.0 :alpha (- 1 0.95)}]})] + (bootstrap/bootstrap-bca (take 20 (repeatedly (constantly 1))) + (juxt stats/mean stats/variance) + 100 + [0.5 ci (- 1.0 ci)] + well/well-rng-1024a))))) + +#_(comment + (let [f (fn [n] (take n (repeatedly rand)))] + (dissoc (criterium.bench/measure (f 1000000)) :expr-value)) + + (let [f (fn [n] (take n (criterium.util.well/well-rng-1024a)))] + (dissoc (criterium.bench/measure (f 1000000)) :expr-value)) + + (criterium.bench/time (bootstrap-estimate (take 1000000 (repeatedly rand)))) + + (let [f (fn [n] (bootstrap-estimate (take n (repeatedly rand))))] + (double (/ (criterium.toolkit/elapsed-time + (-> (criterium.bench/measure + (f 1000000)) + (dissoc :expr-value))) + (double units/MILLISEC-NS)))) + + (def m (criterium.arg-gen/for-all + [v (clojure.test.check.generators/vector + (clojure.test.check.generators/double* {:inifinte? false :NaN? false + :min 0 :max 1}) + 1000000)] + (bootstrap-estimate v))) + + (dissoc (criterium.measure/measure m {}) :state)) + +(deftest bootstrap-stats-for-test + (testing "constant input" + (let [samples (mapv double (repeat 100 1)) + stats (bootstrap/bootstrap-stats-for + samples + {:estimate-quantiles [0.025 0.975] :quantiles [0.99]} + sampled-stats-test/identity-transforms) + result (bootstrap/->BcaEstimate + 1.0 + [{:value 1.0 :alpha 0.025} + {:value 1.0 :alpha 0.975}])] + (is (= result (-> stats :mean))) + (is (= result (-> stats :quantiles (get 0.25)))) + (is (= result (-> stats :quantiles (get 0.75)))) + (is (= result (-> stats :quantiles (get 0.99)))) + (is (= (bootstrap/->BcaEstimate + 0.0 + [{:value 0.0 :alpha 0.025} + {:value 0.0 :alpha 0.975}]) + (-> stats :variance))))) + + (testing "sequential input" + (let [samples (mapv double (range 101)) + stats (bootstrap/bootstrap-stats-for + samples + {:estimate-quantiles [0.025 0.975] :quantiles [0.99]} + sampled-stats-test/identity-transforms)] + (let [{m :point-estimate + [{l :value} {u :value}] :estimate-quantiles} + (-> stats :mean)] + (is (< l m u)) + (is (< l 50 u))) + (let [{m :point-estimate + [{l :value} {u :value}] :estimate-quantiles} + (-> stats :variance)] + (is (< l m u)) + (is (< l 858.5 u))))) + + (testing "reverse sequential input" + (let [samples (mapv double (reverse (range 101))) + stats (bootstrap/bootstrap-stats-for + samples + {:estimate-quantiles [0.025 0.975] :quantiles [0.99]} + sampled-stats-test/identity-transforms)] + (let [{m :point-estimate + [{l :value} {u :value}] :estimate-quantiles} + (-> stats :mean)] + (is (< l m u)) + (is (< l 50 u))) + (let [{m :point-estimate + [{l :value} {u :value}] :estimate-quantiles} + (-> stats :variance)] + (is (< l m u)) + (is (< l 858.5 u)))))) + +;; todo add helpers for constant samples +;; integration test of time with bootstrap +(defn sample-values + "Generate batched samples with the given mean and standard deviation." + [batch-size num-samples random-seed mean sigma] + (let [batch-size (long batch-size) + values (->> (sampled-stats-test/random-values + random-seed mean sigma) + (take num-samples) + vec)] + (mapv #(* (double %) batch-size) values))) + +(deftest analyse-bootstrap-test + (let [batch-size 100 + num-samples 1000 + samples {[:v] (sample-values batch-size num-samples 123 10.0 1.0)} + metric-samples (assoc + (metrics-samples samples batch-size) + :metrics-defs + {:v + {:type :quantitative + :values [{:path [:v] + :type :quantitative + :dimension :time + :scale 1 + :label "v"}]}}) + result ((bootstrap/bootstrap-stats + {:quantiles [0.99] + :estimate-quantiles [0.025 0.975] + :bootstrap-size 100}) + {:samples metric-samples}) + point (have + (-> result + :bootstrap-stats + util/bootstrap + :v + :mean + :point-estimate))] + (is (test-max-error 10.0 point 0.1 "mean") + (str "Value: " point)))) diff --git a/bases/criterium/test/criterium/util/format_test.clj b/bases/criterium/test/criterium/util/format_test.clj new file mode 100644 index 0000000..bd22dd1 --- /dev/null +++ b/bases/criterium/test/criterium/util/format_test.clj @@ -0,0 +1,44 @@ +(ns criterium.util.format-test + (:require + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.util.format :as format])) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec format-non-dimensional-value-test + (prop/for-all [i gen/small-integer] + (= (str i) (format/format-value :non-dimensional i)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec format-time-value-ns-test + (prop/for-all [^double t (gen/double* {:max 1e-6 :min 0 :NaN? false})] + (= (format "%3.3g ns" (* t 1e9)) + (format/format-value :time t)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec format-time-value-us-test + (prop/for-all [^double t (gen/double* {:max 1e-3 :min 1e-6 :NaN? false})] + (= (format "%3.3g µs" (* t 1e6)) + (format/format-value :time t)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec format-time-value-ms-test + (prop/for-all [^double t (gen/double* + {:max (- 1 (Math/ulp 1.0)) :min 1e-3 :NaN? false})] + (= (format "%3.3g ms" (* t 1e3)) + (format/format-value :time t)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec format-time-value-s-test + (prop/for-all [t (gen/double* {:max 60 :min 1 :NaN? false})] + (= (format "%3.3g s" t) + (format/format-value :time t)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec format-time-value-min-test + (prop/for-all [^double t (gen/double* {:min (+ 60 (Math/ulp 60.0)) + :infinite? false + :NaN? false})] + (= (format "%3.3g min" (/ t 60.0)) + (format/format-value :time t)))) diff --git a/bases/criterium/test/criterium/util/forms_test.clj b/bases/criterium/test/criterium/util/forms_test.clj new file mode 100644 index 0000000..011e834 --- /dev/null +++ b/bases/criterium/test/criterium/util/forms_test.clj @@ -0,0 +1,62 @@ +(ns criterium.util.forms-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.util.forms :refer [cond*]])) + +(deftest cond*-tests + (testing "Basic conditional logic without let" + (is (= :a (cond* true :a))) + (is (= :b (cond* false :a true :b))) + (is (nil? (cond* false :a false :b)))) + + (testing "Single let binding" + (is (= 15 (cond* + :let [a 10] + true (+ a 5)))) + + (is (= 20 (cond* + false :skip + :let [b 15] + (> b 10) (+ b 5))))) + + (testing "Cascading let bindings" + (is (= 40 (cond* + :let [a 10] + :let [b (+ a 20)] + true (+ a b)))) + + (is (= "15 cats" (cond* + :let [x 5] + false :wrong + :let [y (+ x 10)] + (> y 12) (str y " cats") + :else :default)))) + + (testing "Else clause behavior" + (is (= 25 (cond* + :let [a 20] + false :wrong + :let [b 5] + :else (+ a b)))) + + (is (= :default (cond* + :let [x 10] + (< x 5) :impossible + :else :default)))) + + (testing "Error conditions" + (testing "Invalid :let usage" + (is (thrown? Exception (eval `(cond* :let "not a vector"))))) + + (testing "Odd number of clauses" + (is (thrown? Exception (eval `(cond* :let [~'a 1] true)))) + (is (thrown? Exception (eval `(cond* true :a false)))))) + + (testing "Edge cases" + (testing "Only let bindings" + (is (nil? (cond* :let [a 1] :let [b 2])))) + + (testing "Early return prevents later binding" + (is (= 10 (cond* + true 10 + :let [x (throw (Exception. "Shouldn't execute"))])))))) diff --git a/bases/criterium/test/criterium/util/helpers_test.clj b/bases/criterium/test/criterium/util/helpers_test.clj new file mode 100644 index 0000000..0040fb0 --- /dev/null +++ b/bases/criterium/test/criterium/util/helpers_test.clj @@ -0,0 +1,52 @@ +(ns criterium.util.helpers-test + (:require + [clojure.set :as set] + [clojure.test :refer [deftest is testing]] + [criterium.util.helpers :as util])) + +(defn- current-thread-priority + [] + (.getPriority (Thread/currentThread))) + +(deftest with-thread-priority-test + (testing "with-thread-priority" + (testing "when given a valid priority," + (let [original-priority (current-thread-priority) + priority (first (set/difference + #{Thread/MAX_PRIORITY Thread/MIN_PRIORITY} + #{original-priority}))] + (assert (not= original-priority priority)) + (testing "sets the priority for the body," + (util/with-thread-priority priority + (is (= priority (current-thread-priority)))) + (testing "and resets it on exit" + (is (= original-priority (current-thread-priority))))) + (testing "sets the priority for the body," + (try + (util/with-thread-priority priority + (is (= priority (current-thread-priority))) + (throw (ex-info "Intentional" {}))) + (catch clojure.lang.ExceptionInfo _)) + (testing "and resets it on exit when the body throws" + (is (= original-priority (current-thread-priority))))))) + (testing "when given :max-priority," + (testing "sets Threaad/MAX_PRIORITY priority for the body," + (util/with-thread-priority :max-priority + (is (= Thread/MAX_PRIORITY (current-thread-priority)))))) + (testing "when given :min-priority," + (testing "sets Threaad/MIN_PRIORITY priority for the body," + (util/with-thread-priority :min-priority + (is (= Thread/MIN_PRIORITY (current-thread-priority)))))) + (testing "when given a nil priority," + (let [p (current-thread-priority)] + (util/with-thread-priority nil + (testing "the thread priority is unchanged" + (is (= p (current-thread-priority))))))) + (testing "when given a non-integer priority, throws" + (is (thrown? clojure.lang.ExceptionInfo + (util/with-thread-priority 0.1)))) + (testing "when given an out of range priority, throws" + (is (thrown? clojure.lang.ExceptionInfo + (util/with-thread-priority (dec Thread/MIN_PRIORITY)))) + (is (thrown? clojure.lang.ExceptionInfo + (util/with-thread-priority (inc Thread/MAX_PRIORITY))))))) diff --git a/bases/criterium/test/criterium/util/invariant_test.clj b/bases/criterium/test/criterium/util/invariant_test.clj new file mode 100644 index 0000000..972ba91 --- /dev/null +++ b/bases/criterium/test/criterium/util/invariant_test.clj @@ -0,0 +1,48 @@ +(ns criterium.util.invariant-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.util.invariant :refer [have have?]])) + +(deftest have?-test + (testing "have?" + (testing "with a truthy predicate" + (is (true? (have? :data {:data 1})) + "returns true") + (is (true? (have? {:data 1})) + "returns true")) + (testing "with an untruthy predicate" + (is (thrown? + AssertionError + (have? :missing {:data 1})) + "throws")) + (testing "with no predicate and an untruthy value" + (is (thrown? + AssertionError + (have? nil)) + "throws") + (is (thrown? + AssertionError + (have? false)) + "throws")))) + +(deftest have-test + (testing "have" + (testing "with a truthy predicate" + (is (= {:data 1} (have :data {:data 1})) + "returns its argument") + (is (= {:data 1} (have {:data 1})) + "returns its argument")) + (testing "with an untruthy predicate" + (is (thrown? + AssertionError + (have :missing {:data 1})) + "throws")) + (testing "with no predicate and an untruthy value" + (is (thrown? + AssertionError + (have nil)) + "throws") + (is (thrown? + AssertionError + (have false)) + "throws")))) diff --git a/bases/criterium/test/criterium/util/probability_test.clj b/bases/criterium/test/criterium/util/probability_test.clj new file mode 100644 index 0000000..71cdffa --- /dev/null +++ b/bases/criterium/test/criterium/util/probability_test.clj @@ -0,0 +1,53 @@ +(ns criterium.util.probability-test + (:require + [clojure.test :refer [deftest is]] + [criterium.test-utils :refer [test-max-error]] + [criterium.util.probability :as probability])) + +;; Values from R, qnorm (with options(digits=15)) +(deftest normal-quantile-test + (is (pos? (probability/normal-quantile 0.5001))) + (is (neg? (probability/normal-quantile 0.4999))) + (is (< 2e-8 (- (probability/normal-quantile 0.999) + (probability/normal-quantile 0.001)))) + (let [max-error 1.0e-7] + (is (= 0.0 (probability/normal-quantile 0.5))) + (is (test-max-error + 1.2815515655446 + (probability/normal-quantile 0.9) + max-error)) + (is (test-max-error + 0.674489750196082 + (probability/normal-quantile 0.75) + max-error)) + (is (test-max-error + -1.03643338949379 + (probability/normal-quantile 0.15) + max-error)) + (is (test-max-error + -2.32634787404084 + (probability/normal-quantile 0.01) + max-error)))) + +;; Values from R, erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1 +(deftest erf-test + (let [max-error 1.5e-7] + (test-max-error 0.999999984582742 (probability/erf 4.0) max-error) + (test-max-error 0.995322265018953 (probability/erf 2.0) max-error) + (test-max-error 0.842700792949715 (probability/erf 1.0) max-error) + (test-max-error 0.112462916018285 (probability/erf 0.1) max-error) + (test-max-error 0.0112834155558497 (probability/erf 0.01) max-error))) + +;; Values from R, pnorm +(deftest normal-cdf-test + (let [max-error 1.5e-7] + (test-max-error 0.99865010196837 (probability/normal-cdf 3.0) max-error) + (test-max-error 0.977249868051821 (probability/normal-cdf 2.0) max-error) + (test-max-error 0.841344746068543 (probability/normal-cdf 1.0) max-error) + (test-max-error 0.691462461274013 (probability/normal-cdf 0.5) max-error) + (test-max-error 0.5 (probability/normal-cdf 0.0) max-error) + (test-max-error 0.158655253931457 (probability/normal-cdf -1.0) max-error) + (test-max-error + 0.00134989803163009 + (probability/normal-cdf -3.0) + max-error))) diff --git a/bases/criterium/test/criterium/util/sampled_stats_test.clj b/bases/criterium/test/criterium/util/sampled_stats_test.clj new file mode 100644 index 0000000..2b534ab --- /dev/null +++ b/bases/criterium/test/criterium/util/sampled_stats_test.clj @@ -0,0 +1,195 @@ +(ns criterium.util.sampled-stats-test + (:require + [clojure.test :refer [deftest is testing]] + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.test-utils :refer [abs-error approx= gen-bounded test-max-error]] + [criterium.util.sampled-stats :as sampled-stats] + [criterium.util.stats :as stats] + [criterium.util.well :as well] + [criterium.util.ziggurat :as ziggurat])) + +(deftest pair-fn-test + (is (= [:a 15] ((sampled-stats/pair-fn :a (partial * 3)) 5)))) + +(deftest quantile-fns-test + (is (= {0.01 1 0.99 99} + (sampled-stats/sample-quantiles [0.01 0.99] (range 101))))) + +(deftest stats-fns-test + (is (= [[:mean 50.0] [:variance 858.5] [:min-val 0] [:max-val 100]] + (sampled-stats/stats-fns (range 101))))) + +(defn batch-transforms [^long batch-size] + {:sample-> (list (fn [^double v] (/ v batch-size))) + :->sample [(fn [^double v] (* v batch-size))]}) + +(def identity-transforms + {:sample-> (list identity) + :->sample [identity]}) + +(deftest stats-for-test + (let [samples (mapv double (repeat 100 1)) + stats (sampled-stats/stats-for + samples {:quantiles [0.05 0.95]})] + (is (= 1.0 (-> stats :mean))) + (is (= 0.0 (-> stats :variance)))) + + (testing "stats on [0..100]" + (let [samples (mapv double (range 101)) + stats (sampled-stats/stats-for + samples {:quantiles [0.05 0.95]})] + (is (= 50.0 (-> stats :mean))) + (is (= 858.5 (-> stats :variance))) + (is (= 0.0 (-> stats :min-val))) + (is (= 100.0 (-> stats :max-val))))) + + (testing "stats on (reverse [0..100])" + (let [samples (mapv double (range 101)) + stats (sampled-stats/stats-for + samples {:quantiles [0.05 0.95]})] + (is (= 50.0 (-> stats :mean))) + (is (= 858.5 (-> stats :variance))) + (is (= 0.0 (-> stats :min-val))) + (is (= 100.0 (-> stats :max-val))))) + + (testing "stats on [9 9 9 10 10 10]" + (let [samples (mapv double [9 9 9 10 10 10]) + stats (sampled-stats/stats-for + samples {:quantiles [0.05 0.95]})] + (is (= 9.5 (-> stats :mean))) + (test-max-error 0.3 (-> stats :variance) 1e-5) + (is (= 9.0 (-> stats :min-val))) + (is (= 10.0 (-> stats :max-val)))))) + +(deftest quantiles-for-test + (let [samples {[:v] (repeat 100 1)} + quantiles (sampled-stats/quantiles-for + [:v] samples {:quantiles [0.05 0.95]})] + (is (= {0.25 1.0, 0.5 1.0, 0.75 1.0, 0.05 1.0, 0.95 1.0} quantiles))) + + (testing "quantiles on [0..100]" + (let [samples {[:v] (range 101)} + quantiles (sampled-stats/quantiles-for + [:v] samples {:quantiles [0.05 0.95]})] + (is (= {0.25 25.0, 0.5 50.0, 0.75 75.0, 0.05 5.0, 0.95 95.0} quantiles)))) + + (testing "quantiles on (reverse [0..100])" + (let [samples {[:v] (range 101)} + quantiles (sampled-stats/quantiles-for + [:v] samples {:quantiles [0.05 0.95]})] + (is (= {0.25 25.0, 0.5 50.0, 0.75 75.0, 0.05 5.0, 0.95 95.0} + quantiles))))) + +(deftest stats-for-test-property-1 + (let [batch-size 5000 + num-samples 200 + values (take + (* batch-size num-samples) + (ziggurat/random-normal-zig + (well/well-rng-1024a))) + sample-vals (partition batch-size values) + samples (mapv #(stats/sum (mapv double %)) sample-vals) + stats (sampled-stats/stats-for + samples {:quantiles [0.05 0.95]}) + mean-hat (-> stats :mean) + variance-hat (-> stats :variance) + mean (stats/mean values) + variance (stats/variance values)] + (test-max-error (* mean batch-size) mean-hat 1e-5) + (is (approx= (* variance batch-size) variance-hat 2e-1)))) + +(defn random-values + "Return a sequence of values with the given mean an standard deviation." + [random-seed ^double mean ^double sigma] + (let [random-source (java.util.Random. random-seed)] + (->> #(.nextDouble random-source) + repeatedly + ziggurat/random-normal-zig + (map (fn [^double x] (+ mean (* sigma x))))))) + +(comment + #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} + (defspec random-values-test-property 10 + (prop/for-all + [random-seed gen/large-integer + mean (gen/double* {:min 0 :max 20 :infinite? false :NaN? false}) + sigma (gen/double* {:min 1 :max 1000 :infinite? false :NaN? false})] + (let [values (take 10000 (random-values random-seed mean sigma)) + variance (* sigma sigma) + mean-error (abs-error (stats/mean values) mean) + variance-error (abs-error (stats/variance values) variance) + mean-tol (max (* sigma 5e-2) 1e-2) + variance-tol (* variance 2e-1)] + (is (< mean-error mean-tol) "mean") + (is (< variance-error variance-tol) "variance") + (and (< mean-error mean-tol) + (< variance-error variance-tol)))))) + +(defn sample-values + "Generate batched samples with the given mean and standard deviation." + [batch-size num-samples random-seed mean sigma] + (let [values (->> (random-values random-seed mean sigma) + (take (* ^long batch-size ^long num-samples)) + vec) + sample-vals (partition batch-size values) + samples {[:v] (mapv #(stats/sum %) sample-vals)}] + {:samples samples + :values values})) + +(comment + #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} + (defspec sample-values-test-property 10 + (prop/for-all + [batch-size (gen-bounded 1 1) + random-seed gen/nat + mean (gen/double* {:min 0 :max 20 :infinite? false :NaN? false}) + sigma (gen/double* {:min 1 :max 1000 :infinite? false :NaN? false})] + (let [num-samples 10000 + {:keys [values]} + (sample-values batch-size num-samples random-seed mean sigma) + mean-error (abs-error (stats/mean values) mean) + variance (* sigma sigma) + variance-error (abs-error (stats/variance values) variance) + mean-tol (max (* sigma 1e-1) 1e-2) + variance-tol (* variance 2e-1)] + (is (< mean-error mean-tol) "mean") + (is (< variance-error variance-tol) "variance") + (and (< mean-error mean-tol) + (< variance-error variance-tol)))))) + +(defn stats-values [batch-size num-samples random-seed mean sigma] + (let [{:keys [samples values]} (sample-values + batch-size num-samples random-seed mean sigma) + stats (sampled-stats/stats-for + (samples [:v]) + {:quantiles [0.05 0.95]}) + mean-hat (-> stats :mean) + variance-hat (-> stats :variance) + mean (stats/mean values) + variance (* (stats/variance values) 1)] + {:mean mean + :variance variance + :mean-hat mean-hat + :variance-hat variance-hat + :samples samples})) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec stats-for-test-property 10 + (prop/for-all + [^long batch-size (gen-bounded 1 1000) + random-seed gen/nat] + (let [num-samples (long (quot 10000 batch-size)) + mean 10 + sigma 3 + {:keys [^double mean ^double variance mean-hat variance-hat]} + (stats-values batch-size num-samples random-seed mean sigma) + mean-error (abs-error (* batch-size mean) mean-hat) + variance-error (abs-error (* batch-size variance) variance-hat) + mean-tol (max (* sigma 1e-1) 1e-2) + variance-tol (* ^double variance 5e-1)] + (is (< mean-error mean-tol)) + (is (< variance-error variance-tol)) + (and (< mean-error mean-tol) + (< variance-error variance-tol))))) diff --git a/bases/criterium/test/criterium/util/stats_test.clj b/bases/criterium/test/criterium/util/stats_test.clj new file mode 100644 index 0000000..9a2c184 --- /dev/null +++ b/bases/criterium/test/criterium/util/stats_test.clj @@ -0,0 +1,86 @@ +(ns criterium.util.stats-test + (:require + [clojure.test :refer [deftest is testing]] + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.test-utils :refer [test-max-error]] + [criterium.util.stats :as stats] + [criterium.util.well :as well])) + +(deftest mean-test + (is (= 1.0 (stats/mean (repeat 20 1)))) + (is (= 3.0 (stats/mean (range 0 7)))) + (is (= 50.0 (stats/mean (range 0 101))))) + +(deftest sum-test + (is (= 20 (stats/sum (take 20 (repeatedly (constantly 1)))))) + (is (= 21 (stats/sum (range 0 7))))) + +(deftest sum-of-squares-test + (is (= 20.0 (stats/sum-of-squares (take 20 (repeatedly (constantly 1)))))) + (is (= 80.0 (stats/sum-of-squares (take 20 (repeatedly (constantly 2)))))) + (is (= 91.0 (stats/sum-of-squares (range 0 7))))) + +(deftest variance-test + (is (= 0.0 (stats/variance (take 20 (repeatedly (constantly 1)))))) + (is (= 4.0 (stats/variance (range 0 7) 0))) + (is (= 850.0 (stats/variance (range 0 101) 0))) ; R: mean((y-mean(y))^2) + (is (= 858.5 (stats/variance (range 0 101) 1)))) ; R: var(y) + +(deftest median-test + ;; R: median(vs) + (is (= [5 [1 2] [7 8]] + (stats/median [1 2 5 7 8]))) + (is (= [3.5 [1 2 2] [5 7 8]] + (stats/median [1 2 2 5 7 8])))) + +(deftest quartiles-test + ;; R: quantile(vs, prob=c(.25,0.5,.75)) + (is (= [1.5 5 7.5] + (stats/quartiles [1 2 5 7 8]))) + (is (= [2 3.5 7] + (stats/quartiles [1 2 2 5 7 8])))) + +(deftest quantile-test + (testing "exact data points" + ;; R: quantile(c(1,2,5,7,8), prob=c(.25,0.5,.75)) + (is (= 2 (stats/quantile 0.25 [1 2 5 7 8]))) + (is (= 5 (stats/quantile 0.5 [1 2 5 7 8]))) + (is (= 7 (stats/quantile 0.75 [1 2 5 7 8])))) + (testing "interpolated data points" + ;; R: quantile(c(1,2,2,5,7,8), prob=c(.25,0.5,.75)) + (is (= 2.0 (stats/quantile 0.25 [1 2 2 5 7 8]))) + (is (= 3.5 (stats/quantile 0.5 [1 2 2 5 7 8]))) + (is (= 6.5 (stats/quantile 0.75 [1 2 2 5 7 8]))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec sample-uniform-test + (testing "sample-uniform" + (testing "returns values in [0..n)]" + (prop/for-all + [t gen/nat] + (every? + #(<= 0 % t) + (stats/sample-uniform 100 t (well/well-rng-1024a))))))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec sample-uniform-count-test + (testing "sample-uniform" + (testing "returns the correct number of samples" + (prop/for-all + [t gen/nat] + (= t + (count (stats/sample-uniform t 1 (well/well-rng-1024a)))))))) + +(deftest boxplot-outlier-thresholds-test + (is (= [-4.0 -1.0 7.0 10.0] (stats/boxplot-outlier-thresholds 2.0 4.0)))) + +(deftest quantiles-test + (let [max-error 1.5e-7] + (test-max-error 1.0 (stats/quantile 0.5 [0 1 2]) max-error) + (test-max-error 1.5 (stats/quantile 0.5 [0 1 2 3]) max-error) + (test-max-error 1.0 (stats/quantile 0.25 [0 1 1.5 2 3]) max-error) + (test-max-error 2.0 (stats/quantile 0.75 [0 1 1.5 2 3]) max-error)) + (is (= 5 (stats/quantile 0.05 (range 0 101)))) + (is (= 95 (stats/quantile 0.95 (range 0 101))))) diff --git a/bases/criterium/test/criterium/util/t_digest/MergingDigest.java b/bases/criterium/test/criterium/util/t_digest/MergingDigest.java new file mode 100644 index 0000000..03748e2 --- /dev/null +++ b/bases/criterium/test/criterium/util/t_digest/MergingDigest.java @@ -0,0 +1,946 @@ +/* + * Licensed to Ted Dunning under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tdunning.math.stats; + +import java.nio.ByteBuffer; +import java.util.AbstractCollection; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * Maintains a t-digest by collecting new points in a buffer that is then sorted occasionally and merged + * into a sorted array that contains previously computed centroids. + *

+ * This can be very fast because the cost of sorting and merging is amortized over several insertion. If + * we keep N centroids total and have the input array is k long, then the amortized cost is something like + *

+ * N/k + log k + *

+ * These costs even out when N/k = log k. Balancing costs is often a good place to start in optimizing an + * algorithm. For different values of compression factor, the following table shows estimated asymptotic + * values of N and suggested values of k: + * + * + * + * + * + * + * + * + * + * + *
CompressionNk
507825
10015742
20031473
Sizing considerations for t-digest
+ *

+ * The virtues of this kind of t-digest implementation include: + *

    + *
  • No allocation is required after initialization
  • + *
  • The data structure automatically compresses existing centroids when possible
  • + *
  • No Java object overhead is incurred for centroids since data is kept in primitive arrays
  • + *
+ *

+ * The current implementation takes the liberty of using ping-pong buffers for implementing the merge resulting + * in a substantial memory penalty, but the complexity of an in place merge was not considered as worthwhile + * since even with the overhead, the memory cost is less than 40 bytes per centroid which is much less than half + * what the AVLTreeDigest uses and no dynamic allocation is required at all. + */ +public class MergingDigest extends AbstractTDigest { + private int mergeCount = 0; + + private final double publicCompression; + private final double compression; + + // points to the first unused centroid + private int lastUsedCell; + + // sum_i weight[i] See also unmergedWeight + private double totalWeight = 0; + + // number of points that have been added to each merged centroid + private final double[] weight; + // mean of points added to each merged centroid + private final double[] mean; + + // history of all data added to centroids (for testing purposes) + private List> data = null; + + // sum_i tempWeight[i] + private double unmergedWeight = 0; + + // this is the index of the next temporary centroid + // this is a more Java-like convention than lastUsedCell uses + private int tempUsed = 0; + private final double[] tempWeight; + private final double[] tempMean; + private List> tempData = null; + + + // array used for sorting the temp centroids. This is a field + // to avoid allocations during operation + private final int[] order; + + // if true, alternate upward and downward merge passes + public boolean useAlternatingSort = true; + // if true, use higher working value of compression during construction, then reduce on presentation + public boolean useTwoLevelCompression = true; + + // this forces centroid merging based on size limit rather than + // based on accumulated k-index. This can be much faster since we + // scale functions are more expensive than the corresponding + // weight limits. + public static boolean useWeightLimit = true; + + /** + * Allocates a buffer merging t-digest. This is the normally used constructor that + * allocates default sized internal arrays. Other versions are available, but should + * only be used for special cases. + * + * @param compression The compression factor + */ + @SuppressWarnings("WeakerAccess") + public MergingDigest(double compression) { + this(compression, -1); + } + + /** + * If you know the size of the temporary buffer for incoming points, you can use this entry point. + * + * @param compression Compression factor for t-digest. Same as 1/\delta in the paper. + * @param bufferSize How many samples to retain before merging. + */ + @SuppressWarnings("WeakerAccess") + public MergingDigest(double compression, int bufferSize) { + // we can guarantee that we only need ceiling(compression). + this(compression, bufferSize, -1); + } + + /** + * Fully specified constructor. Normally only used for deserializing a buffer t-digest. + * + * @param compression Compression factor + * @param bufferSize Number of temporary centroids + * @param size Size of main buffer + */ + @SuppressWarnings("WeakerAccess") + public MergingDigest(double compression, int bufferSize, int size) { + // ensure compression >= 10 + // default size = 2 * ceil(compression) + // default bufferSize = 5 * size + // scale = max(2, bufferSize / size - 1) + // compression, publicCompression = sqrt(scale-1)*compression, compression + // ensure size > 2 * compression + weightLimitFudge + // ensure bufferSize > 2*size + + // force reasonable value. Anything less than 10 doesn't make much sense because + // too few centroids are retained + if (compression < 10) { + compression = 10; + } + + // the weight limit is too conservative about sizes and can require a bit of extra room + double sizeFudge = 0; + if (useWeightLimit) { + sizeFudge = 10; + if (compression < 30) sizeFudge += 20; + } + + // default size + size = (int) Math.max(2 * compression + sizeFudge, size); + + // default buffer + if (bufferSize == -1) { + // TODO update with current numbers + // having a big buffer is good for speed + // experiments show bufferSize = 1 gives half the performance of bufferSize=10 + // bufferSize = 2 gives 40% worse performance than 10 + // but bufferSize = 5 only costs about 5-10% + // + // compression factor time(us) + // 50 1 0.275799 + // 50 2 0.151368 + // 50 5 0.108856 + // 50 10 0.102530 + // 100 1 0.215121 + // 100 2 0.142743 + // 100 5 0.112278 + // 100 10 0.107753 + // 200 1 0.210972 + // 200 2 0.148613 + // 200 5 0.118220 + // 200 10 0.112970 + // 500 1 0.219469 + // 500 2 0.158364 + // 500 5 0.127552 + // 500 10 0.121505 + bufferSize = 5 * size; + } + + // ensure enough space in buffer + if (bufferSize <= 2 * size) { + bufferSize = 2 * size; + } + + // scale is the ratio of extra buffer to the final size + // we have to account for the fact that we copy all live centroids into the incoming space + double scale = Math.max(1, bufferSize / size - 1); + //noinspection ConstantConditions + if (!useTwoLevelCompression) { + scale = 1; + } + + // publicCompression is how many centroids the user asked for + // compression is how many we actually keep + this.publicCompression = compression; + this.compression = Math.sqrt(scale) * publicCompression; + + // changing the compression could cause buffers to be too small, readjust if so + if (size < this.compression + sizeFudge) { + size = (int) Math.ceil(this.compression + sizeFudge); + } + + // ensure enough space in buffer (possibly again) + if (bufferSize <= 2 * size) { + bufferSize = 2 * size; + } + + weight = new double[size]; + mean = new double[size]; + + tempWeight = new double[bufferSize]; + tempMean = new double[bufferSize]; + order = new int[bufferSize]; + + lastUsedCell = 0; + } + + /** + * Turns on internal data recording. + */ + @Override + public TDigest recordAllData() { + super.recordAllData(); + data = new ArrayList<>(); + tempData = new ArrayList<>(); + return this; + } + + @Override + void add(double x, int w, Centroid base) { + add(x, w, base.data()); + } + + @Override + public void add(double x, int w) { + add(x, w, (List) null); + } + + private void add(double x, int w, List history) { + if (Double.isNaN(x)) { + throw new IllegalArgumentException("Cannot add NaN to t-digest"); + } + if (tempUsed >= tempWeight.length - lastUsedCell - 1) { + mergeNewValues(); + } + int where = tempUsed++; + tempWeight[where] = w; + tempMean[where] = x; + unmergedWeight += w; + if (x < min) { + min = x; + } + if (x > max) { + max = x; + } + + if (data != null) { + if (tempData == null) { + tempData = new ArrayList<>(); + } + while (tempData.size() <= where) { + tempData.add(new ArrayList()); + } + if (history == null) { + history = Collections.singletonList(x); + } + tempData.get(where).addAll(history); + } + } + + private void add(double[] m, double[] w, int count, List> data) { + if (m.length != w.length) { + throw new IllegalArgumentException("Arrays not same length"); + } + if (m.length < count + lastUsedCell) { + // make room to add existing centroids + double[] m1 = new double[count + lastUsedCell]; + System.arraycopy(m, 0, m1, 0, count); + m = m1; + double[] w1 = new double[count + lastUsedCell]; + System.arraycopy(w, 0, w1, 0, count); + w = w1; + } + double total = 0; + for (int i = 0; i < count; i++) { + total += w[i]; + } + merge(m, w, count, data, null, total, false, compression); + } + + @Override + public void add(List others) { + if (others.size() == 0) { + return; + } + int size = 0; + for (TDigest other : others) { + other.compress(); + size += other.centroidCount(); + } + + double[] m = new double[size]; + double[] w = new double[size]; + List> data; + if (recordAllData) { + data = new ArrayList<>(); + } else { + data = null; + } + int offset = 0; + for (TDigest other : others) { + if (other instanceof MergingDigest) { + MergingDigest md = (MergingDigest) other; + System.arraycopy(md.mean, 0, m, offset, md.lastUsedCell); + System.arraycopy(md.weight, 0, w, offset, md.lastUsedCell); + if (data != null) { + for (Centroid centroid : other.centroids()) { + data.add(centroid.data()); + } + } + offset += md.lastUsedCell; + } else { + for (Centroid centroid : other.centroids()) { + m[offset] = centroid.mean(); + w[offset] = centroid.count(); + if (recordAllData) { + assert data != null; + data.add(centroid.data()); + } + offset++; + } + } + } + add(m, w, size, data); + } + + private void mergeNewValues() { + mergeNewValues(false, compression); + } + + private void mergeNewValues(boolean force, double compression) { + if (totalWeight == 0 && unmergedWeight == 0) { + // seriously nothing to do + return; + } + if (force || unmergedWeight > 0) { + // note that we run the merge in reverse every other merge to avoid left-to-right bias in merging + merge(tempMean, tempWeight, tempUsed, tempData, order, unmergedWeight, + useAlternatingSort & mergeCount % 2 == 1, compression); + mergeCount++; + tempUsed = 0; + unmergedWeight = 0; + if (data != null) { + tempData = new ArrayList<>(); + } + } + } + + private void merge(double[] incomingMean, double[] incomingWeight, int incomingCount, + List> incomingData, int[] incomingOrder, + double unmergedWeight, boolean runBackwards, double compression) { + // when our incoming buffer fills up, we combine our existing centroids with the incoming data, + // and then reduce the centroids by merging if possible + assert lastUsedCell <= 0 || weight[0] == 1; + assert lastUsedCell <= 0 || weight[lastUsedCell - 1] == 1; + System.arraycopy(mean, 0, incomingMean, incomingCount, lastUsedCell); + + System.arraycopy(weight, 0, incomingWeight, incomingCount, lastUsedCell); + incomingCount += lastUsedCell; + + if (incomingData != null) { + for (int i = 0; i < lastUsedCell; i++) { + assert data != null; + incomingData.add(data.get(i)); + } + data = new ArrayList<>(); + } + if (incomingOrder == null) { + incomingOrder = new int[incomingCount]; + } + Sort.stableSort(incomingOrder, incomingMean, incomingCount); + + totalWeight += unmergedWeight; + + // option to run backwards is to help investigate bias in errors + if (runBackwards) { + Sort.reverse(incomingOrder, 0, incomingCount); + } + + + // start by copying the least incoming value to the normal buffer + lastUsedCell = 0; + mean[lastUsedCell] = incomingMean[incomingOrder[0]]; + weight[lastUsedCell] = incomingWeight[incomingOrder[0]]; + double wSoFar = 0; + if (data != null) { + assert incomingData != null; + data.add(incomingData.get(incomingOrder[0])); + } + + // weight will contain all zeros after this loop + + double normalizer = scale.normalizer(compression, totalWeight); + double k1 = scale.k(0, normalizer); + double wLimit = totalWeight * scale.q(k1 + 1, normalizer); + for (int i = 1; i < incomingCount; i++) { + int ix = incomingOrder[i]; + double proposedWeight = weight[lastUsedCell] + incomingWeight[ix]; + double projectedW = wSoFar + proposedWeight; + boolean addThis; + if (useWeightLimit) { + double q0 = wSoFar / totalWeight; + double q2 = (wSoFar + proposedWeight) / totalWeight; + addThis = proposedWeight <= totalWeight * Math.min(scale.max(q0, normalizer), scale.max(q2, normalizer)); + } else { + addThis = projectedW <= wLimit; + } + if (i == 1 || i == incomingCount - 1) { + // force last centroid to never merge + addThis = false; + } + + if (addThis) { + // next point will fit + // so merge into existing centroid + weight[lastUsedCell] += incomingWeight[ix]; + mean[lastUsedCell] = mean[lastUsedCell] + (incomingMean[ix] - mean[lastUsedCell]) * incomingWeight[ix] / weight[lastUsedCell]; + incomingWeight[ix] = 0; + + if (data != null) { + while (data.size() <= lastUsedCell) { + data.add(new ArrayList()); + } + assert incomingData != null; + assert data.get(lastUsedCell) != incomingData.get(ix); + data.get(lastUsedCell).addAll(incomingData.get(ix)); + } + } else { + // didn't fit ... move to next output, copy out first centroid + wSoFar += weight[lastUsedCell]; + if (!useWeightLimit) { + k1 = scale.k(wSoFar / totalWeight, normalizer); + wLimit = totalWeight * scale.q(k1 + 1, normalizer); + } + + lastUsedCell++; + mean[lastUsedCell] = incomingMean[ix]; + weight[lastUsedCell] = incomingWeight[ix]; + incomingWeight[ix] = 0; + + if (data != null) { + assert incomingData != null; + assert data.size() == lastUsedCell; + data.add(incomingData.get(ix)); + } + } + } + // points to next empty cell + lastUsedCell++; + + // sanity check + double sum = 0; + for (int i = 0; i < lastUsedCell; i++) { + sum += weight[i]; + } + assert sum == totalWeight; + if (runBackwards) { + Sort.reverse(mean, 0, lastUsedCell); + Sort.reverse(weight, 0, lastUsedCell); + if (data != null) { + Collections.reverse(data); + } + } + assert weight[0] == 1; + assert weight[lastUsedCell - 1] == 1; + + if (totalWeight > 0) { + min = Math.min(min, mean[0]); + max = Math.max(max, mean[lastUsedCell - 1]); + } + } + + /** + * Exposed for testing. + */ + int checkWeights() { + return checkWeights(weight, totalWeight, lastUsedCell); + } + + private int checkWeights(double[] w, double total, int last) { + int badCount = 0; + + int n = last; + if (w[n] > 0) { + n++; + } + + double normalizer = scale.normalizer(publicCompression, totalWeight); + double k1 = scale.k(0, normalizer); + double q = 0; + double left = 0; + String header = "\n"; + for (int i = 0; i < n; i++) { + double dq = w[i] / total; + double k2 = scale.k(q + dq, normalizer); + q += dq / 2; + if (k2 - k1 > 1 && w[i] != 1) { + System.out.printf("%sOversize centroid at " + + "%d, k0=%.2f, k1=%.2f, dk=%.2f, w=%.2f, q=%.4f, dq=%.4f, left=%.1f, current=%.2f maxw=%.2f\n", + header, i, k1, k2, k2 - k1, w[i], q, dq, left, w[i], totalWeight * scale.max(q, normalizer)); + header = ""; + badCount++; + } + if (k2 - k1 > 4 && w[i] != 1) { + throw new IllegalStateException( + String.format("Egregiously oversized centroid at " + + "%d, k0=%.2f, k1=%.2f, dk=%.2f, w=%.2f, q=%.4f, dq=%.4f, left=%.1f, current=%.2f, maxw=%.2f\n", + i, k1, k2, k2 - k1, w[i], q, dq, left, w[i], totalWeight * scale.max(q, normalizer))); + } + q += dq / 2; + left += w[i]; + k1 = k2; + } + + return badCount; + } + + /** + * Merges any pending inputs and compresses the data down to the public setting. + * Note that this typically loses a bit of precision and thus isn't a thing to + * be doing all the time. It is best done only when we want to show results to + * the outside world. + */ + @Override + public void compress() { + mergeNewValues(true, publicCompression); + } + + @Override + public long size() { + return (long) (totalWeight + unmergedWeight); + } + + @Override + public double cdf(double x) { + if (Double.isNaN(x) || Double.isInfinite(x)) { + throw new IllegalArgumentException(String.format("Invalid value: %f", x)); + } + mergeNewValues(); + + if (lastUsedCell == 0) { + // no data to examine + return Double.NaN; + } else if (lastUsedCell == 1) { + // exactly one centroid, should have max==min + double width = max - min; + if (x < min) { + return 0; + } else if (x > max) { + return 1; + } else if (x - min <= width) { + // min and max are too close together to do any viable interpolation + return 0.5; + } else { + // interpolate if somehow we have weight > 0 and max != min + return (x - min) / (max - min); + } + } else { + int n = lastUsedCell; + if (x < min) { + return 0; + } + + if (x > max) { + return 1; + } + + // check for the left tail + if (x < mean[0]) { + // note that this is different than mean[0] > min + // ... this guarantees we divide by non-zero number and interpolation works + if (mean[0] - min > 0) { + // must be a sample exactly at min + if (x == min) { + return 0.5 / totalWeight; + } else { + return (1 + (x - min) / (mean[0] - min) * (weight[0] / 2 - 1)) / totalWeight; + } + } else { + // this should be redundant with the check x < min + return 0; + } + } + assert x >= mean[0]; + + // and the right tail + if (x > mean[n - 1]) { + if (max - mean[n - 1] > 0) { + if (x == max) { + return 1 - 0.5 / totalWeight; + } else { + // there has to be a single sample exactly at max + double dq = (1 + (max - x) / (max - mean[n - 1]) * (weight[n - 1] / 2 - 1)) / totalWeight; + return 1 - dq; + } + } else { + return 1; + } + } + + // we know that there are at least two centroids and mean[0] < x < mean[n-1] + // that means that there are either one or more consecutive centroids all at exactly x + // or there are consecutive centroids, c0 < x < c1 + double weightSoFar = 0; + for (int it = 0; it < n - 1; it++) { + // weightSoFar does not include weight[it] yet + if (mean[it] == x) { + // we have one or more centroids == x, treat them as one + // dw will accumulate the weight of all of the centroids at x + double dw = 0; + while (it < n && mean[it] == x) { + dw += weight[it]; + it++; + } + return (weightSoFar + dw / 2) / totalWeight; + } else if (mean[it] <= x && x < mean[it + 1]) { + // landed between centroids ... check for floating point madness + if (mean[it + 1] - mean[it] > 0) { + // note how we handle singleton centroids here + // the point is that for singleton centroids, we know that their entire + // weight is exactly at the centroid and thus shouldn't be involved in + // interpolation + double leftExcludedW = 0; + double rightExcludedW = 0; + if (weight[it] == 1) { + if (weight[it + 1] == 1) { + // two singletons means no interpolation + // left singleton is in, right is out + return (weightSoFar + 1) / totalWeight; + } else { + leftExcludedW = 0.5; + } + } else if (weight[it + 1] == 1) { + rightExcludedW = 0.5; + } + double dw = (weight[it] + weight[it + 1]) / 2; + + // can't have double singleton (handled that earlier) + assert dw > 1; + assert (leftExcludedW + rightExcludedW) <= 0.5; + + // adjust endpoints for any singleton + double left = mean[it]; + double right = mean[it + 1]; + + double dwNoSingleton = dw - leftExcludedW - rightExcludedW; + + // adjustments have only limited effect on endpoints + assert dwNoSingleton > dw / 2; + assert right - left > 0; + double base = weightSoFar + weight[it] / 2 + leftExcludedW; + return (base + dwNoSingleton * (x - left) / (right - left)) / totalWeight; + } else { + // this is simply caution against floating point madness + // it is conceivable that the centroids will be different + // but too near to allow safe interpolation + double dw = (weight[it] + weight[it + 1]) / 2; + return (weightSoFar + dw) / totalWeight; + } + } else { + weightSoFar += weight[it]; + } + } + if (x == mean[n - 1]) { + return 1 - 0.5 / totalWeight; + } else { + throw new IllegalStateException("Can't happen ... loop fell through"); + } + } + } + + @Override + public double quantile(double q) { + if (q < 0 || q > 1) { + throw new IllegalArgumentException("q should be in [0,1], got " + q); + } + mergeNewValues(); + + if (lastUsedCell == 0) { + // no centroids means no data, no way to get a quantile + return Double.NaN; + } else if (lastUsedCell == 1) { + // with one data point, all quantiles lead to Rome + return mean[0]; + } + + // we know that there are at least two centroids now + int n = lastUsedCell; + + // if values were stored in a sorted array, index would be the offset we are interested in + final double index = q * totalWeight; + + // beyond the boundaries, we return min or max + // usually, the first centroid will have unit weight so this will make it moot + if (index < 1) { + return min; + } + + // if the left centroid has more than one sample, we still know + // that one sample occurred at min so we can do some interpolation + if (weight[0] > 1 && index < weight[0] / 2) { + // there is a single sample at min so we interpolate with less weight + return min + (index - 1) / (weight[0] / 2 - 1) * (mean[0] - min); + } + + // usually the last centroid will have unit weight so this test will make it moot + if (index > totalWeight - 1) { + return max; + } + + // if the right-most centroid has more than one sample, we still know + // that one sample occurred at max so we can do some interpolation + if (weight[n - 1] > 1 && totalWeight - index <= weight[n - 1] / 2) { + return max - (totalWeight - index - 1) / (weight[n - 1] / 2 - 1) * (max - mean[n - 1]); + } + + // in between extremes we interpolate between centroids + double weightSoFar = weight[0] / 2; + for (int i = 0; i < n - 1; i++) { + double dw = (weight[i] + weight[i + 1]) / 2; + if (weightSoFar + dw > index) { + // centroids i and i+1 bracket our current point + + // check for unit weight + double leftUnit = 0; + if (weight[i] == 1) { + if (index - weightSoFar < 0.5) { + // within the singleton's sphere + return mean[i]; + } else { + leftUnit = 0.5; + } + } + double rightUnit = 0; + if (weight[i + 1] == 1) { + if (weightSoFar + dw - index <= 0.5) { + // no interpolation needed near singleton + return mean[i + 1]; + } + rightUnit = 0.5; + } + double z1 = index - weightSoFar - leftUnit; + double z2 = weightSoFar + dw - index - rightUnit; + return weightedAverage(mean[i], z2, mean[i + 1], z1); + } + weightSoFar += dw; + } + // we handled singleton at end up above + assert weight[n - 1] > 1; + assert index <= totalWeight; + assert index >= totalWeight - weight[n - 1] / 2; + + // weightSoFar = totalWeight - weight[n-1]/2 (very nearly) + // so we interpolate out to max value ever seen + double z1 = index - totalWeight - weight[n - 1] / 2.0; + double z2 = weight[n - 1] / 2 - z1; + return weightedAverage(mean[n - 1], z1, max, z2); + } + + @Override + public int centroidCount() { + mergeNewValues(); + return lastUsedCell; + } + + @Override + public Collection centroids() { + // we don't actually keep centroid structures around so we have to fake it + compress(); + return new AbstractCollection() { + @Override + public Iterator iterator() { + return new Iterator() { + int i = 0; + + @Override + public boolean hasNext() { + return i < lastUsedCell; + } + + @Override + public Centroid next() { + Centroid rc = new Centroid(mean[i], (int) weight[i], data != null ? data.get(i) : null); + i++; + return rc; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Default operation"); + } + }; + } + + @Override + public int size() { + return lastUsedCell; + } + }; + } + + @Override + public double compression() { + return publicCompression; + } + + @Override + public int byteSize() { + compress(); + // format code, compression(float), buffer-size(int), temp-size(int), #centroids-1(int), + // then two doubles per centroid + return lastUsedCell * 16 + 32; + } + + @Override + public int smallByteSize() { + compress(); + // format code(int), compression(float), buffer-size(short), temp-size(short), #centroids-1(short), + // then two floats per centroid + return lastUsedCell * 8 + 30; + } + + @SuppressWarnings("WeakerAccess") + public ScaleFunction getScaleFunction() { + return scale; + } + + @Override + public void setScaleFunction(ScaleFunction scaleFunction) { + super.setScaleFunction(scaleFunction); + } + + public enum Encoding { + VERBOSE_ENCODING(1), SMALL_ENCODING(2); + + private final int code; + + Encoding(int code) { + this.code = code; + } + } + + @Override + public void asBytes(ByteBuffer buf) { + compress(); + buf.putInt(Encoding.VERBOSE_ENCODING.code); + buf.putDouble(min); + buf.putDouble(max); + buf.putDouble(publicCompression); + buf.putInt(lastUsedCell); + for (int i = 0; i < lastUsedCell; i++) { + buf.putDouble(weight[i]); + buf.putDouble(mean[i]); + } + } + + @Override + public void asSmallBytes(ByteBuffer buf) { + compress(); + buf.putInt(Encoding.SMALL_ENCODING.code); // 4 + buf.putDouble(min); // + 8 + buf.putDouble(max); // + 8 + buf.putFloat((float) publicCompression); // + 4 + buf.putShort((short) mean.length); // + 2 + buf.putShort((short) tempMean.length); // + 2 + buf.putShort((short) lastUsedCell); // + 2 = 30 + for (int i = 0; i < lastUsedCell; i++) { + buf.putFloat((float) weight[i]); + buf.putFloat((float) mean[i]); + } + } + + @SuppressWarnings("WeakerAccess") + public static MergingDigest fromBytes(ByteBuffer buf) { + int encoding = buf.getInt(); + if (encoding == Encoding.VERBOSE_ENCODING.code) { + double min = buf.getDouble(); + double max = buf.getDouble(); + double compression = buf.getDouble(); + int n = buf.getInt(); + MergingDigest r = new MergingDigest(compression); + r.setMinMax(min, max); + r.lastUsedCell = n; + for (int i = 0; i < n; i++) { + r.weight[i] = buf.getDouble(); + r.mean[i] = buf.getDouble(); + + r.totalWeight += r.weight[i]; + } + return r; + } else if (encoding == Encoding.SMALL_ENCODING.code) { + double min = buf.getDouble(); + double max = buf.getDouble(); + double compression = buf.getFloat(); + int n = buf.getShort(); + int bufferSize = buf.getShort(); + MergingDigest r = new MergingDigest(compression, bufferSize, n); + r.setMinMax(min, max); + r.lastUsedCell = buf.getShort(); + for (int i = 0; i < r.lastUsedCell; i++) { + r.weight[i] = buf.getFloat(); + r.mean[i] = buf.getFloat(); + + r.totalWeight += r.weight[i]; + } + return r; + } else { + throw new IllegalStateException("Invalid format for serialized histogram"); + } + + } + + @Override + public String toString() { + return "MergingDigest" + + "-" + getScaleFunction() + + "-" + (useWeightLimit ? "weight" : "kSize") + + "-" + (useAlternatingSort ? "alternating" : "stable") + + "-" + (useTwoLevelCompression ? "twoLevel" : "oneLevel"); + } +} diff --git a/bases/criterium/test/criterium/util/t_digest/merging_digest_test.clj b/bases/criterium/test/criterium/util/t_digest/merging_digest_test.clj new file mode 100644 index 0000000..411383d --- /dev/null +++ b/bases/criterium/test/criterium/util/t_digest/merging_digest_test.clj @@ -0,0 +1,421 @@ +(ns criterium.util.t-digest.merging-digest-test + "Tests for t-digest implementation" + (:require + [clojure.test :refer [deftest is testing]] + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.test-utils :refer [approx= gen-double]] + [criterium.util.probability :as probability] + [criterium.util.t-digest.merging-digest :as md] + [criterium.util.t-digest.scale :as scale] + [criterium.util.well :as well] + [criterium.util.ziggurat :as ziggurat])) + +#_(deftest merge-centroids-invariants + (let [compression 100.0] + (testing "merge-centroids maintains t-digest invariants" + (let [centroids [(td/->Centroid 1.0 1.0) + (td/->Centroid 1.1 1.0) + (td/->Centroid 2.0 1.0) + (td/->Centroid 3.0 1.0)] + total-weight 4.0 + result (#'td/merge-centroids compression centroids total-weight) + _ (println "\nResult centroids:" result) + weights (reductions + (map :weight result)) + _ (println "Cumulative weights:" weights) + qs (map #(/ % total-weight) weights) + _ (println "Quantiles:" qs) + k-values (map #(#'td/k1 % compression) qs) + _ (println "K values:" k-values) + k-gaps (map - (rest k-values) k-values) + _ (println "K gaps:" k-gaps)] + + ;; Basic invariants + (is (< (count result) (count centroids))) + (is (= total-weight (reduce + (map :weight result)))) + (is (= (map :mean result) (sort (map :mean result)))) + + ;; k1 separation invariant + (is (every? #(>= % 1.0) k-gaps)))))) + +(def gen-centroid + (gen/fmap (fn [m] (md/->Centroid m 1.0)) + (gen-double {:min -1000 :max 1000}) )) + +(defspec merge-centroids-centroids-test + (prop/for-all [centroids (gen/vector gen-centroid 1 100) + compression (gen-double {:min 1.0 :max 1000.0})] + (let [sorted (sort-by :mean centroids) + total-weight (reduce + (map :weight sorted)) + merged (#'md/merge-centroids + compression + sorted + total-weight + scale/k0)] + (testing "Number of centroids should not increase" + (>= (count sorted) (count merged)))))) + +(defspec merge-centroids-total-weight-test + (prop/for-all [centroids (gen/vector gen-centroid 1 100) + compression (gen-double {:min 1.0 :max 1000.0})] + (let [sorted (sort-by :mean centroids) + total-weight (reduce + (map :weight sorted)) + merged (#'md/merge-centroids + compression + sorted + total-weight + scale/k0)] + (testing "Total weight should be preserved" + (approx= total-weight (reduce + (map :weight merged))))))) + +(defspec merge-centroids-means-ordered-test + (prop/for-all [centroids (gen/vector gen-centroid 1 100) + compression (gen-double {:min 1.0 :max 1000.0})] + (let [sorted (sort-by :mean centroids) + total-weight (reduce + (map :weight sorted)) + merged (#'md/merge-centroids + compression + sorted + total-weight + scale/k0)] + (testing "Means should remain ordered" + (when (not= (map :mean merged) (sort (map :mean merged))) + (prn (map :mean merged) (sort (map :mean merged)))) + (= (map :mean merged) (sort (map :mean merged))))))) + +#_(defspec merge-centroids-separation-test + (prop/for-all [centroids (gen/vector gen-centroid 5 100) + compression (gen-double {:min 1.0 :max 1000.0})] + (let [sorted (sort-by :mean centroids) + total-weight (reduce + (map :weight sorted)) + merged (#'md/merge-centroids + compression + sorted + total-weight + scale/k2)] + (testing "Check separation between centroids meets k1 requirements" + (let [qs (reductions + (map #(/ (:weight %) total-weight) merged)) + normalizer (scale/normalizer scale/k0 compression total-weight) + k-values (map #(scale/k scale/k0 % normalizer total-weight) qs) + k-gaps (map - (rest k-values) k-values)] + (when (not (every? #(>= % 1.0) k-gaps)) + (prn :k-values k-values :k-gaps k-gaps)) + (every? #(>= % 1.0) k-gaps)))))) + +(deftest quantile-edge-cases + (testing "quantile edge cases" + (let [d (md/compress + (reduce + md/add-point + (md/new-digest) + [1.0 2.0 3.0]))] + (is (= 1.0 (md/quantile d 0.0))) + (is (= 3.0 (md/quantile d 1.0))) + (is (some? (md/quantile d 0.5)))))) + +(deftest buffering-operations + (testing "buffer behavior" + (let [d (md/new-digest 100.0 3)] + (testing "points are buffered" + (let [d (-> d + (md/add-point 1.0) + (md/add-point 2.0))] + (is (= 2 (count (:temp-centroids d)))) + (is (= 0 (count (:centroids d)))) + (is (= 2.0 (:unmerged-weight d))) + (is (= 0.0 (:total-weight d))))) + + (testing "buffer merges when full" + (let [d (-> d + (md/add-point 1.0) + (md/add-point 2.0) + (md/add-point 3.0) + (md/add-point 4.0))] + (is (= 1 (count (:temp-centroids d)))) + (is (= 3 (count (:centroids d)))) + (is (= 1.0 (:unmerged-weight d))) + (is (= 3.0 (:total-weight d))))) + + (testing "compress merges partial buffer" + (let [d (-> d + (md/add-point 1.0) + (md/add-point 2.0) + md/compress)] + (is (empty? (:temp-centroids d))) + (is (= 2 (count (:centroids d)))) + (is (= 0.0 (:unmerged-weight d))) + (is (= 2.0 (:total-weight d))))))) + + (testing "NaN handling" + (is (thrown-with-msg? + clojure.lang.ExceptionInfo + #"Cannot add NaN to t-digest" + (md/add-point (md/new-digest) Double/NaN))))) + +(defn box-muller + "Generate normal random numbers using Box-Muller transform." + [^double mean ^double std-dev] + (let [u1 (double (rand)) + u2 (double (rand)) + r (Math/sqrt (* -2 (Math/log u1))) + t (* 2 Math/PI u2)] + (+ mean (* std-dev r (Math/cos t))))) + +(deftest normal-distribution-quantile-test + (testing "accuracy with normal distribution" + (let [n 20000 + mean 0.0 + std-dev 1.0 + samples (take n (ziggurat/random-normal-zig + (well/well-rng-1024a))) + digest (reduce md/add-point (md/new-digest 100.0) samples) + expected-q {0.01 (probability/normal-quantile 0.01) + 0.1 (probability/normal-quantile 0.1) + 0.25 (probability/normal-quantile 0.25) + 0.5 mean + 0.75 (probability/normal-quantile 0.75) + 0.9 (probability/normal-quantile 0.9) + 0.99 (probability/normal-quantile 0.99)} + digest (md/compress digest)] + (doseq [[q expected-val] expected-q] + (let [actual (md/quantile digest q) + error (Math/abs (/ (- actual (double expected-val)) std-dev))] + (testing (format "quantile %.2f" q) + (is (< error 0.2) + (format "error %.3f std-dev at q=%.2f (expected=%.2f, got=%.2f)" + error q expected-val actual)))))))) + +(deftest normal-distribution-cdf-test + (testing "accuracy with normal distribution" + (let [n 20000 + mean 0.0 + std-dev 1.0 + samples (take n (ziggurat/random-normal-zig + (well/well-rng-1024a))) + digest (reduce md/add-point (md/new-digest 100.0) samples) + expected-cdf {-2.0 (probability/normal-cdf -2.0) + -1.0 (probability/normal-cdf -1.0) + -0.25 (probability/normal-cdf -0.25) + 0.0 (probability/normal-cdf 0.0) + 0.25 (probability/normal-cdf 0.25) + 1.0 (probability/normal-cdf 1.0) + 2.0 (probability/normal-cdf 2.0)} + digest (md/compress digest)] + (doseq [[z expected-val] expected-cdf] + (let [actual (md/cdf digest z) + error (Math/abs (/ (- actual (double expected-val)) std-dev))] + (testing (format "cdf %.2f" z) + (is (< error 0.01) + (format "error %.3f std-dev at q=%.2f (expected=%.2f, got=%.2f)" + error z expected-val actual)))))))) + +(deftest normal-distribution-sample-states-test + (testing "accuracy with normal distribution" + (let [n 20000 + samples (take n (ziggurat/random-normal-zig + (well/well-rng-1024a))) + digest (reduce md/add-point (md/new-digest 100.0) samples) + digest (md/compress digest)] + ;; NOTE we should calculate bounds for these using the t and chi-squared + ;; distributions. + (is (> 0.05 (Math/abs (md/mean digest)))) + (is (approx= 1.0 (md/variance digest) 0.1))))) + +(deftest basic-operations + (testing "quantile" + (testing "empty digest" + (let [d (md/new-digest)] + (is (= 0.0 (:total-weight d))) + (is (empty? (:centroids d))) + (is (NaN? (md/quantile d 0.5))))) + + (testing "single point" + (let [d (-> (md/new-digest) + (md/add-point 1.0) + (md/compress))] + (is (= 1.0 (:total-weight d))) + (is (= 1.0 (md/quantile d 0.0))) + (is (= 1.0 (md/quantile d 0.5))) + (is (= 1.0 (md/quantile d 1.0))))) + + (testing "two points" + (let [d (-> (md/new-digest) + (md/add-point 1.0) + (md/add-point 2.0) + (md/compress))] + (is (= 2.0 (:total-weight d))) + (is (= 1.0 (md/quantile d 0.0))) + (is (= 2.0 (md/quantile d 1.0))))) + + (testing "uniform distribution" + (let [points (range 0 100 10) + d (md/compress (reduce md/add-point (md/new-digest) points)) + _ (println "Centroids:" (:centroids d)) + _ (println "Total weight:" (:total-weight d)) + q50 (md/quantile d 0.5) + _ (println "q50:" q50) + q100 (md/quantile d 1.0) + _ (println "q100:" q100)] + (is (= 0.0 (md/quantile d 0.0))) + (is (= 50.0 q50)) + (is (= 90.0 q100))))) ) + + +(deftest cdf-edge-cases + (testing "invalid inputs" + (let [d (md/new-digest)] + (is (thrown? Exception (md/cdf d Double/NaN))) + (is (thrown? Exception (md/cdf d Double/POSITIVE_INFINITY))) + (is (thrown? Exception (md/cdf d Double/NEGATIVE_INFINITY)))))) + +(deftest cdf-empty-test + (testing "empty digest" + (let [d (md/new-digest)] + (is (NaN? (md/cdf d 0.0))) + (is (NaN? (md/mean d))) + (is (NaN? (md/variance d))) + (is (NaN? (md/minimum d))) + (is (NaN? (md/maximum d)))))) + +(deftest cdf-single-centroid-test + (testing "single centroid" + (let [d (-> (md/new-digest) + (md/add-point 1.0) + (md/compress))] + (is (= 0.0 (md/cdf d 0.0))) + (is (= 1.0 (md/cdf d 2.0))) + (is (= 0.5 (md/cdf d 1.0)))))) + +(deftest cdf-two-point-test + (testing "two singletom points" + (let [d (-> (md/new-digest) + (md/add-point 1.0) + (md/add-point 3.0) + (md/compress))] + (is (= 0.0 (md/cdf d 0.0)) "below") + (is (= 0.5 (md/cdf d 1.0)) "at first") + (is (= 1.0 (md/cdf d 3.0)) "at last") + (is (= 1.0 (md/cdf d 4.0)) "above") + (is (approx= 0.5 (md/cdf d 2.0)) "midpoint")))) + +(deftest cdf-uniform-test + (testing "uniform distribution" + (let [points (range 0 100 10) + d (md/compress (reduce md/add-point (md/new-digest) points))] + (is (= 0.0 (md/cdf d -1.0)) "below") + (is (= 1.0 (md/cdf d 100.0)) "above") + (is (approx= 0.5 (md/cdf d 45.0) 0.1) "midpoint")))) + +;; Generators + +(def gen-finite-double + (gen/double* {:infinite? false :NaN? false})) + + +(defn gen-digest + [& {:keys [buffer-size compression min-samples max-samples] + :or {min-samples 1 + max-samples 100 + compression 100 + buffer-size 128}}] + (gen/fmap (fn [points] + (md/compress + (reduce + md/add-point + (md/new-digest compression buffer-size) + points))) + (gen/vector gen-finite-double min-samples max-samples))) + +;; Property-based tests + +(defspec cdf-bounds-property + 100 + (prop/for-all [d (gen-digest) + x gen-finite-double] + (let [cdf-x (md/cdf d x)] + (and (<= 0.0 cdf-x 1.0) + (= 0.0 (md/cdf d (- Double/MAX_VALUE))) + (= 1.0 (md/cdf d Double/MAX_VALUE)))))) + +(defspec cdf-monotonic-property + 100 + (prop/for-all [d (gen-digest) + ^double x gen-finite-double + ^double dx (gen/double* + {:min 0.0 :max 100.0 + :infinite? false :NaN? false})] + (let [cdf-x (md/cdf d x) + cdf-x+dx (md/cdf d (+ x dx))] + (when-not (>= cdf-x+dx cdf-x) + (prn :x x :dx dx :cdf-x+dx cdf-x+dx :cdf-x cdf-x )) + (>= cdf-x+dx cdf-x)))) + +;; The following two tests do not work because of step-wise values from +;; singleton centroids. + +#_(defspec cdf-right-continuous-property + 100 + (prop/for-all [d gen-digest + ^double x gen-finite-double] + (let [epsilon 1e-10 + left (md/cdf d x) + right (md/cdf d (+ x epsilon))] + (approx= left right 1e-5)))) + +#_(defspec cdf-quantile-inverse-property + {:num-tests 100 + :max-size 1000} + (prop/for-all [d (gen-digest {:min-samples 200 :max-samples 1000}) + p (gen/double* {:min 0.0 :max 1.0 + :infinite? false :NaN? false})] + (let [q (md/quantile d p) + p' (md/cdf d q)] + (approx= p p' 1e-5)))) + + +(deftest transform-operations + (testing "linear transformation" + (let [digest (-> (md/new-digest) + (md/add-point 1.0) + (md/add-point 2.0) + (md/add-point 3.0) + md/compress) + transformed (md/transform digest (fn [^double x] (+ x 10.0)))] + (is (= [11.0 12.0 13.0] (map :mean (:centroids transformed)))) + (is (= 11.0 (:minimum transformed))) + (is (= 13.0 (:maximum transformed))) + (is (= (:total-weight digest) (:total-weight transformed))) + (is (empty? (:temp-centroids transformed))))) + + (testing "non-linear transformation with sign change" + (let [digest (-> (md/new-digest) + (md/add-point -2.0) + (md/add-point 0.0) + (md/add-point 3.0) + md/compress) + transformed (md/transform digest (fn [^double x] (* x x)))] + (is (= [4.0 0.0 9.0] (map :mean (:centroids transformed)))) + (is (= 4.0 (:minimum transformed)) "Minimum from original min (-2)^2") + (is (= 9.0 (:maximum transformed)) "Maximum from original max 3^2") + (is (= (:total-weight digest) (:total-weight transformed))) + (is (empty? (:temp-centroids transformed))))) + + (testing "empty digest transformation" + (let [digest (md/new-digest) + transformed (md/transform digest (constantly 5.0))] + (is (empty? (:centroids transformed))) + (is (= 5.0 (:minimum transformed))) + (is (= 5.0 (:maximum transformed))) + (is (empty? (:temp-centroids transformed))))) + + (testing "single centroid transformation" + (let [digest (-> (md/new-digest) + (md/add-point 5.0) + md/compress) + transformed (md/transform digest (fn [^double x] (* x 2.0)))] + (is (= [10.0] (map :mean (:centroids transformed)))) + (is (= 10.0 (:minimum transformed))) + (is (= 10.0 (:maximum transformed))) + (is (= (:total-weight digest) (:total-weight transformed))) + (is (empty? (:temp-centroids transformed)))))) diff --git a/bases/criterium/test/criterium/util/t_digest/scale_test.clj b/bases/criterium/test/criterium/util/t_digest/scale_test.clj new file mode 100644 index 0000000..3fd9f5d --- /dev/null +++ b/bases/criterium/test/criterium/util/t_digest/scale_test.clj @@ -0,0 +1,144 @@ +(ns criterium.util.t-digest.scale-test + (:require + [clojure.math :refer [ulp]] + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.test-utils :refer [approx=]] + [criterium.util.t-digest.scale :as scale])) + +(defn gen-double [options] + (gen/double* + (merge {:infinite? false :NaN? false} options))) + +(defn inverse-property + [scale] + ;; we lose accuracy near the q limits + (prop/for-all [q (gen-double {:min 1e-12 :max 0.5}) + norm (gen-double {:min 1.0 :max 1000.0})] + (let [k (scale/k scale q norm) + q' (scale/q scale k norm)] + (approx= + q + q' + 1e-4)))) + +(defn k-normalizer-arity-property + [scale] + (prop/for-all [q (gen-double {:min 0.0 :max 0.5}) + comp (gen-double {:min 1.0 :max 1000.0}) + n (gen-double {:min 1.0 :max 1000.0})] + (let [norm (scale/normalizer scale comp n) + k (scale/k scale q norm) + k' (scale/k scale q comp n)] + (approx= k k')))) + +(defn q-normalizer-arity-property + [scale ^double k-min] + ;; we lose accuracy near the q limits + (prop/for-all [k (gen-double {:min k-min :max 100.0}) + comp (gen-double {:min 1.0 :max 1000.0}) + n (gen-double {:min 1.0 :max 1000.0})] + (let [norm (scale/normalizer scale comp n) + q (scale/q scale k norm) + q' (scale/q scale k comp n)] + (approx= q q' 1e-4)))) + +(defn max-size-normalizer-arity-property + [scale] + (prop/for-all [q (gen-double {:min 1e-15 :max (- 1.0 1e-15)}) + comp (gen-double {:min 1.0 :max 1000.0}) + n (gen-double {:min 1.0 :max 1000.0})] + (let [norm (scale/normalizer scale comp n) + s (scale/max-size scale q norm) + s' (scale/max-size scale q comp n)] + (approx= s s')))) + +(defn k-reflection-property + [scale] + (prop/for-all [^double q (gen-double {:min 1e-15 :max (- 0.5 (ulp 0.5))}) + norm (gen-double {:min 1.0 :max 1000.0})] + (let [k (scale/k scale q norm) + k' (- (scale/k scale (- 1.0 q) norm))] + ;; k1 should be symmetric around q=0.5 + (approx= k k' 1e-4 5)))) + +(defn k-monotonic-property + [scale] + (prop/for-all [^double q (gen-double {:min 0.0 :max 0.5}) + norm (gen-double {:min 1.0 :max 1000.0})] + (let [k (scale/k scale q norm) + k' (scale/k scale (+ q 0.1) norm)] + ;; k1 should increase monotonically with q + (> (scale/k scale (+ q 0.1) norm) k)))) + +(defspec k0-inverse-property-test + (inverse-property scale/k0)) + +(defspec k0-k-normalizer-arity-property + (k-normalizer-arity-property scale/k0)) + +(defspec k0-q-normalizer-arity-property + (q-normalizer-arity-property scale/k0 0.0)) + +(defspec k0-max-size-normalizer-arity-property + (max-size-normalizer-arity-property scale/k0)) + +(comment + ;; This doesn't hold for k0 + (defspec k0-k-reflection-property + (k-reflection-property scale/k0))) + +(defspec k0-k-monotonic-property + (k-monotonic-property scale/k0)) + + + +(defspec k1-inverse-property-test + (inverse-property scale/k1)) + +(defspec k1-k-normalizer-arity-property + (k-normalizer-arity-property scale/k1)) + +(defspec k1-q-normalizer-arity-property + (q-normalizer-arity-property scale/k1 0.0)) + +(defspec k1-max-size-normalizer-arity-property + (max-size-normalizer-arity-property scale/k1)) + +(defspec k1-k-reflection-property + (k-reflection-property scale/k1)) + +(defspec k1-k-monotonic-property + (k-monotonic-property scale/k1)) + + +(defspec k2-inverse-property-test + (inverse-property scale/k2)) + +(defspec k2-k-normalizer-arity-property + (k-normalizer-arity-property scale/k2)) + +(defspec k2-q-normalizer-arity-property + (q-normalizer-arity-property scale/k2 (+ 1.0 1e-5))) + +(defspec k2-max-size-normalizer-arity-property + (max-size-normalizer-arity-property scale/k2)) + +(defspec k2-k-reflection-property + (k-reflection-property scale/k2 )) + +(defspec k2-k-monotonic-property + (k-monotonic-property scale/k2)) + + +#_(defspec k1-scale-function-properties-test + (prop/for-all [q (gen/double* {:min 0.0 :max 0.5 :NaN? false}) + comp (gen/double* {:min 1.0 :max 1000.0 :NaN? false})] + (let [scale scale/k1 + k1 (scale/k scale q comp)] + (and + ;; k1 should increase monotonically with q + (> (scale/k scale (+ q 0.1) comp) k1) + ;; k1 should be symmetric around q=0.5 + (approx= k1 (- (scale/k scale (- 1.0 q) comp)) 1e-7 5))))) diff --git a/test/criterium/well_test.clj b/bases/criterium/test/criterium/util/well_test.clj similarity index 92% rename from test/criterium/well_test.clj rename to bases/criterium/test/criterium/util/well_test.clj index 0423d9d..7fc13f0 100644 --- a/test/criterium/well_test.clj +++ b/bases/criterium/test/criterium/util/well_test.clj @@ -1,19 +1,51 @@ -(ns criterium.well-test - (:use [criterium.well]) - (:use clojure.test)) +(ns criterium.util.well-test + (:require + [clojure.test :refer [deftest is]] + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.test-utils :refer [gen-bounded test-max-error]] + [criterium.util.stats :as stats] + [criterium.util.well :as well])) (deftest bit-shift-right-ns-test - (is (= 4 (bit-shift-right-ns 8 1))) - (is (= Integer/MAX_VALUE (bit-shift-right-ns int-max 1)))) + (is (= 4 (well/bit-shift-right-ns 8 1))) + (is (= Integer/MAX_VALUE (well/bit-shift-right-ns (well/int-max) 1)))) (def well-1024a-res0 [0.7555646521504968 0.19296938693150878 0.5025935324374586 0.007121119415387511 0.3206049546133727 0.6226997578050941 0.19532041205093265 0.6845046859234571 0.8087562681175768 0.5690997282508761 0.5894886783789843 0.5046155452728271 0.9751246876548976 0.6914222184568644 0.3825726895593107 0.30811961530707777 0.3162773186340928 0.25978297321125865 0.1882068100385368 0.4053002488799393 0.9043356603942811 0.32395724789239466 0.6211904066149145 0.9419454524759203 0.9449770536739379 0.9217105777934194 0.4034570436924696 0.9789473409764469 0.6736162463203073 0.5853548941668123 0.5453409468755126 0.22516538621857762 0.6669957956764847 0.7062211805023253 0.34354016627185047 0.9300084430724382 0.13674178812652826 0.675577673362568 0.9510733592323959 0.2949520070105791 0.7337068577762693 0.07864327798597515 0.5205190151464194 0.38997069909237325 0.9444665557239205 0.45795420347712934 0.2416362497024238 0.6867330060340464 0.5321219067554921 0.8443026845343411 0.1273612636141479 0.4440925878006965 0.9705026806332171 0.26698391255922616 0.3004519783426076 0.16834274376742542 0.685988245299086 0.2668265213724226 0.9757483757566661 0.9247177159413695 0.7674569354858249 0.013343769125640392 0.008299330482259393 0.6560487598180771 0.816593048395589 0.7043115950655192 0.43986908346414566 0.7592308623716235 0.040754047920927405 0.27999776555225253 0.8341864652466029 0.5772025266196579 0.8917758027091622 0.5601078029721975 0.9991267474833876 0.11920967139303684 0.7340277135372162 0.7873277170583606 0.22335971146821976 0.7469482556916773 0.22395056439563632 0.4285199379082769 0.006469369865953922 0.5851125624030828 0.4061334142461419 0.3672179854474962 0.219418880995363 0.6017608169931918 0.1509036768693477 0.7893868112005293 0.31288449303247035 0.2645777799189091 0.3193615647032857 0.22134555014781654 0.4823740136343986 0.036793411476537585 0.7684427464846522 0.46402973611839116 0.43314514588564634 0.43667770689353347 0.5850964242126793 0.38665328873321414 0.6652298101689667 0.6201674710027874 0.25447412277571857 0.3283572169020772 0.9104763225186616 0.25071978801861405 0.3460737771820277 0.1901100839022547 0.1679911690298468 0.42636885214596987 0.15886777569539845 0.5018260458018631 0.8311577739659697 0.39929200266487896 0.2771829552948475 0.10446352534927428 0.24270599242299795 0.36708760308101773 0.6491420033853501 0.07084565586410463 0.6300365508068353 0.3839554046280682 0.6575432498939335 0.7175556654110551 0.7395096861291677 0.19398689735680819 0.6372492231894284 0.158453066367656 0.624662283109501 0.1218663165345788 0.025197059381753206 0.060373732121661305 0.1414467424619943 0.8914599637500942 0.8357561882585287 0.2555791118647903 0.7387337314430624 0.3943949183449149 0.5570761850103736 0.717579206218943 0.03640151396393776 0.782992908032611 0.1442157339770347 0.11787344282492995 0.660314847715199 0.20912273740395904 0.8832072992809117 0.6436884559225291 0.7820864149834961 0.6609280968550593 0.5258305622264743 0.7788030290976167 0.4238539063371718 0.4352403224911541 0.5241800183430314 0.11417777999304235 0.7686906508170068 0.14142680214717984 0.6073010405525565 0.44714760640636086 0.5588981078471988 0.11603280296549201 0.3383257696405053 0.45204067043960094 0.8788658352568746 0.1927342745475471 0.30519954510964453 0.3482908862642944 0.4364028258714825 0.13022807775996625 0.6224698822479695 0.5267538735643029 0.2802534019574523 0.835684244055301 0.9912651863414794 0.2809236270841211 0.7988971949089319 0.6870784577913582 0.8867741664871573 0.907712601358071 0.6892909298185259 0.6843732518609613 0.9055354928132147 0.8161950923968107 0.7711446485482156 0.1942125738132745 0.6105280555784702 0.902421465376392 0.7958630793727934 0.46677285991609097 0.9925589482299984 0.49840362812392414 0.15570157603360713 0.5051688989624381 0.7655096356756985 0.5824041932355613 0.8782601107377559 0.570579610299319 0.0858758797403425 0.4714239500463009 0.6287357001565397 0.012176015647128224 0.07114821881987154 0.582413146737963 0.027754488168284297 0.6287662922404706 0.17712684581056237 0.3923184243030846 0.46861305576749146 0.3010474380571395 0.5517059159465134 0.5211608030367643 0.13069019163958728 0.46851527132093906 0.4537353322375566 0.9212657301686704 0.8410076696891338 0.36046281177550554 0.8895449675619602 0.31669044960290194 0.9898941377177835 0.08242866210639477 0.6607744314242154 0.003986309748142958 0.7287824084050953 0.4094456802122295 0.5924238685984164 0.8097017053514719 0.3003195868805051 0.17480628797784448 0.9716594759374857 0.7625474957749248 0.9931910787709057 0.5338102197274566 0.78829260263592 0.048111873446032405 0.05054855695925653 0.4288309742696583 0.18000489030964673 0.8022729340009391 0.13487983914092183 0.6594261156860739 0.1937381171155721 0.07948682480491698 0.06719898898154497 0.7758951685391366 0.12151186214759946 0.3549578874371946 0.10694335028529167 0.7249686119612306 0.034047398483380675 0.7973769002128392 0.6693953268695623 0.056540791410952806 0.9503808913286775 0.2247266797348857 0.21758119203150272 0.16954494989477098 0.7755528725683689 0.47671999875456095 0.9087867012713104 0.6066615867894143 0.9130187949631363 0.7757458300329745 0.6787639046087861 0.7722544269636273 0.8862191238440573 0.6488839488010854 0.3661321687977761 0.25797038129530847 0.3972494751214981 0.5784893878735602 0.7151897584553808 0.03715417021885514 0.20087301614694297 0.11141664604656398 0.9212516350671649 0.45895271841436625 0.6172081530094147 0.8468745823483914 0.26728207105770707 0.6675534574314952 0.215640711132437 0.5401744309347123 0.5056740387808532 0.8632741565816104 0.8166575853247195 0.8680351634975523 0.5216173890512437 0.061797033064067364 0.9477440975606441 0.3782971021719277 0.36070644576102495 0.9181073615327477 0.22001777170225978 0.9402690029237419 0.05984971462748945 0.06714744144119322 0.3248397735878825 0.12915337784215808 0.7214453718625009 0.3680953304283321 0.5207683225162327 0.3164702767971903 0.46592912706546485 0.01706564868800342 0.1263738654088229 0.6285697533749044 0.6236315430141985 0.0615514216478914 0.7780995036009699 0.20336504722945392 0.8962475180160254 0.07119414140470326 0.06718915048986673 0.20575407543219626 0.5864740144461393 0.43975308630615473 0.9824135808739811 0.05022888444364071 0.26168027333915234 0.10603460925631225 0.9507736493833363 0.3369394172914326 0.5487543460913002 0.5964405362028629 0.039727029856294394 0.5600688389968127 0.27308964752592146 0.8056181429419667 0.5037669453304261 0.15922586829401553 0.9295837441459298 0.344323945697397 0.07399148819968104 0.3783202322665602 0.4550651325844228 0.5722904775757343 0.94433763041161 0.5447231694124639 0.7208280144259334 0.7500562102068216 0.9623808099422604 0.4074538985732943 0.2690850624348968 0.5638563681859523 0.10262796352617443 0.2477680144365877 0.14297585329040885 0.3442035827320069 0.7458227581810206 0.19771007332019508 0.3566648524720222 0.6700491360388696 0.513715195003897 0.9276824435219169 0.33881583041511476 0.688413605093956 0.654433774529025 0.2213877053000033 0.3943435319233686 0.7640800953377038 0.6358868142124265 0.581883414182812 0.14347550459206104 0.978092819917947 0.444108271272853 0.9932567009236664 0.8562233315315098 0.8532964903861284 0.05465809814631939 0.7031131486874074 0.5079021505080163 0.285394569626078 0.2292247221339494 0.3076238122303039 0.2446007994003594 0.9391862025950104 0.32274545822292566 0.0688715095166117 0.9091717111878097 0.9221273160073906 0.8778240298852324 0.6306535447947681 0.7219051038846374 0.9272768190130591 0.2979870829731226 0.6234900464769453 0.25014320900663733 0.06917555420659482 0.7882044278085232 0.3741772382054478 0.03388847294263542 0.04724784241989255 0.28253703843802214 0.863413778366521 0.44384941179305315 0.5789484502747655 0.16299469489604235 0.18491058400832117 0.9251435853075236 0.9041201474610716 0.38625800609588623 0.08549584378488362 0.7474953709170222 0.24932730663567781 0.35439529339782894 0.7538842514622957 0.5971153748687357 0.9230850720778108 0.3751015772577375 0.6935672552790493 0.5860141841694713 0.2577421758323908 0.9097921634092927 0.7485630193259567 0.5336604935582727 0.8526075799018145 0.3338584292214364 0.26755846408195794 0.16452790540643036 0.12203746545128524 0.28809076244942844 0.43539610342122614 0.5351749199908227 0.7366595065686852 0.5074216385837644 0.10066069127060473 0.44401570898480713 0.9631719945464283 0.5060960829723626 0.7755969970021397 0.6946522817015648 0.622231722343713 0.09502863441593945 0.8774998495355248 0.45593873760662973 0.24934913427568972 0.8637605588883162 0.36392279015854 0.29445424815639853 0.6000052525196224 0.754869501804933 0.5723027035128325 0.6703692160081118 0.25734534789808095 0.8752852398902178 0.8625927641987801 0.5714214837644249 0.8510513559449464 0.38423393853008747 0.6928380699828267 0.9245617059059441 0.4011187197174877 0.6768172681331635 0.3410445817280561 0.48728543263860047 0.8544576889835298 0.6889376011677086 0.3669116934761405 0.9808830141555518 0.16396227315999568 0.7485008628573269 0.25150786037556827 0.8567262347787619 0.9943494431208819 0.2318595563992858 0.15931490086950362 0.7880970344413072 0.4004311515018344 0.4454111475497484 0.8049112134613097 0.659513988532126 0.6869270945899189 0.8730051580350846 0.5473549445159733 0.267884774832055 0.31441421480849385 0.7702884818427265 0.6727925473824143 0.6945093285758048 0.03287728619761765 0.7582073586527258 0.9173706469591707 0.826526662800461 0.0748087705578655 0.052384020294994116 0.7692182012833655 0.47125741257332265 0.3416770545300096 0.4951580313500017 0.29688000050373375 0.8986193402670324 0.9262301037088037 0.16495887795463204 0.7712404427584261 0.7009469424374402 0.6076272109057754 0.10788102052174509 0.5975008320529014 0.5810526120476425 0.9668923146091402 0.9682649879250675 0.4528131391853094 0.33904013922438025 0.9547711247578263 0.10916815837845206 0.27618894656188786 0.7535752141848207 0.6686208914034069 0.019345402251929045 0.5708079785108566 0.7135509124491364 0.1342482678592205 0.4500692873261869 0.5260242838412523 0.27724604145623744 0.047203158028423786 0.3471950509119779 0.5208230316638947 0.5562563829589635 0.21969088283367455 0.4926070854999125 0.11484426818788052 0.6475049296859652 0.27259507519192994 0.4088108635041863 0.6945610297843814 0.5331952183041722 0.02602390549145639 0.05668114894069731 0.8983906484209001 0.7980681171175092 0.9842495447956026 0.37138081807643175 0.44817613740451634 0.14653430948965251 0.328293435042724 0.5909028775058687 0.051852585980668664 0.5309626047965139 0.15616464475169778 0.247461658436805 0.8189321781974286 0.37547166971489787 0.0024520547594875097 0.9527097463142127 0.8362732368987054 0.10794873698614538 0.8914683717302978 0.21713400236330926 0.21811435231938958 0.9813511252868921 0.6853204520884901 0.9280226475093514 0.17580619035288692 0.7518611368723214 0.6390619252342731 0.7494605074170977 9.550023823976517E-4 0.1626889507751912 0.9434123083483428 0.08510001236572862 0.9951398021075875 0.3987734855618328 0.5631283049006015 0.8100046294275671 0.1979206409305334 0.4982678396627307 0.16230163467116654 0.2724143280647695 0.9673387475777417 0.1859030667692423 0.2473264851141721 0.025224779034033418 0.13980171456933022 0.8367907286155969 0.04778134380467236 0.0019024272914975882 0.44797221361659467 0.5483753134030849 0.4879827566910535 0.9465762150939554 0.6253756200894713 0.40133296651765704 0.3452297451440245 0.7826143545098603 0.31615317706018686 0.6015399689786136 0.7171431763563305 0.7239149282686412 0.9486609974410385 0.4918237563688308 0.2412079768255353 0.7871187140699476 0.16808019042946398 0.20539209037087858 0.1654653123114258 0.24745060317218304 0.9906764447223395 0.6780416378751397 0.35415648575872183 0.6629006329458207 0.9116796688176692 0.4559391075745225 0.11041606334038079 0.2766846097074449 0.5471633428242058 0.1907645643223077 0.33126555592752993 0.8320216732099652 0.1114092767238617 0.48186866217292845 0.8638578057289124 0.6355665870942175 0.5438802263233811 0.36040451540611684 0.6713373749516904 0.3964302234817296 0.16679690382443368 0.499270795378834 0.8637193085160106 0.061108154244720936 0.4558856321964413 0.3828166981693357 0.636303436011076 0.5016614652704448 0.05376837542280555 0.8274522635620087 0.19399723154492676 0.6016310227569193 0.4179402014706284 0.5041333399713039 0.5594518741127104 0.5210877589415759 0.04799440945498645 0.2569379194173962 0.5121237640269101 0.33526887535117567 0.6449274495244026 0.6687424699775875 0.5142297735437751 0.25666187331080437 0.9993277955800295 0.7522748927585781 0.9112364307511598 0.09822610346600413 0.8092868991661817 0.11013635550625622 0.8229314149357378 0.28508140332996845 0.9270469557959586 0.07332300441339612 0.7638805550523102 0.00898260809481144 0.6322982942219824 0.3097354369238019 0.3096516279038042 0.8748664716258645 0.02619903231970966 0.7353408709168434 0.1530386672820896 0.40527300303801894 0.8478010513354093 0.24587382352910936 0.3452085554599762 0.24724601092748344 0.43039365601725876 0.791206969646737 0.6037243802566081 0.1192402271553874 0.4805405449587852 0.5577954079490155 0.4367192480713129 0.6837504550348967 0.10335447220131755 0.1435617811512202 0.6515296443831176 0.8216401739045978 0.0723428693599999 0.02252646442502737 0.158960031112656 0.31328473635949194 0.21097608003765345 0.3076917855069041 0.9788284015376121 0.7338457198347896 0.42461182619445026 0.12589112808927894 0.42618220881558955 0.5958739381749183 0.8884397419169545 0.299964586738497 0.03730473364703357 0.2662915491964668 0.4868591639678925 0.2552387216128409 0.988661585142836 0.6299099235329777 0.5607837797142565 0.8240154627710581 0.6616356542799622 0.7100805609952658 0.6646123446989805 0.40266008023172617 0.7160254665650427 0.07690791133791208 0.2835704102180898 0.14558348152786493 0.7703524536918849 0.03014729800634086 0.06727877981029451 0.37680157250724733 0.2625046060420573 0.006136440904811025 0.25914360024034977 0.8866662366781384 0.8578437478281558 0.27957644197158515 0.8081350615248084 0.40207430301234126 0.35448492178693414 0.9070351438131183 0.5920429355464876 0.8681049512233585 0.8531742042396218 0.8708263989537954 0.9781084065325558 0.14752329187467694 0.5488005294464529 0.6457502499688417 0.8142608248163015 0.9450360343325883 0.15391431376338005 0.577356917783618 0.10715358797460794 0.18042230582796037 0.4038018074352294 0.751609347993508 0.12286805780604482 0.19896953273564577 0.7437035676557571 0.22499978123232722 0.9863044694066048 0.4531559441238642 0.2740379043389112 0.46292801294475794 0.9837289205752313 0.9174044677056372 0.1161456189583987 0.12932480755262077 0.6283382405526936 0.785972599638626 0.893002551747486 0.6683821144979447 0.48597938823513687 0.7346535462420434 0.4025907658506185 0.16092061367817223 0.6518170442432165 0.12368792877532542 0.08869582670740783 0.07134998822584748 0.14498513145372272 0.9227717637550086 0.05715464102104306 0.295690682483837 0.19093898101709783 0.7125349359121174 0.7769378898665309 0.7961046474520117 0.9322317561600357 0.8330148810055107 0.17268100753426552 0.12588501977734268 0.9317147410474718 0.9756659455597401 0.21344331372529268 0.015632647089660168 0.9431237289682031 0.859583338489756 0.7412657078821212 0.7168328638654202 0.2559301645960659 0.09703678172081709 0.6400787271559238 0.3633797427173704 0.30000986135564744 0.2878304913174361 0.9400027238298208 0.3799480537418276 0.8843172821216285 0.6560051045380533 0.6653353362344205 0.4591320671606809 0.032394493697211146 0.13281420641578734 0.10863196291029453 0.8310163735877723 0.9694646673742682 0.016212739748880267 0.944929052144289 0.016071602934971452 0.6080390429124236 0.4106945467647165 0.5548939574509859 0.7357456600293517 0.021995610324665904 0.8687994643114507 0.4265273907221854 0.20938792382366955 0.8542211556341499 0.8790589557029307 0.506800695322454 0.2581303343176842 0.2501546663697809 0.49468795978464186 0.8987339914310724 0.6493606683798134 0.03351360489614308 0.5585470309015363 0.811490562511608 0.9828383987769485 0.14057071437127888 0.001633527223020792 0.11427318421192467 0.15512081235647202 0.06378560676239431 0.568412387277931 0.2654315805993974 0.7554840522352606 0.7216246470343322 0.828235893510282 0.3895577716175467 0.12386663351207972 0.33415288128890097 0.17428646329790354 0.8764232322573662 0.994880493497476 0.4323724848218262 0.3545102351345122 0.3161176657304168 0.0807878312189132 0.27392753469757736 0.0563712427392602 0.5488673080690205 0.26941023813560605 0.7795852867420763 0.699305682675913 0.9345453025307506 0.31135368742980063 0.31337152095511556 0.668668475234881 0.9941427132580429 0.8030097442679107 0.9986464083194733 0.8405935321934521 0.5569225188810378 0.7799115644302219 0.1413323301821947 0.47963172383606434 0.5704309763386846 0.4912073283921927 0.9357429542578757 0.8018140424974263 0.07972390693612397 0.6038214708678424 0.8669975902885199 0.7128381424117833 0.14792149723507464 0.5674334727227688 0.2871553448494524 0.5546456398442388 0.4027501919772476 0.8121269992552698 0.6845149411819875 0.828492168802768 0.5795849554706365 0.4528217217884958 0.2855009457562119 0.6746565706562251 0.1671252534724772 0.6259617554023862 0.7672309812624007 0.8476617166306823 0.15883506392128766 0.3732784439343959 0.4302535164169967 0.7022808252368122 0.8467318608891219 0.09239248465746641 0.21892477641813457 0.9601358626969159 0.42663405020721257 0.07931019784882665 0.22761585842818022 0.746306738583371 0.020160013809800148 0.6920021711848676 0.6762252035550773 0.37280794652178884 0.6402611255180091 0.9889494553208351 0.23268099315464497 0.5805389611050487 0.6943439054302871 0.26118701766245067 0.20282425242476165 0.9266050483565778 0.8333429330959916 0.10438607912510633 0.7062624464742839 0.5690834317356348 0.5954476820770651 0.1212741497438401 0.23927501193247736 0.1877824713010341 0.2614723739679903 0.6835239802021533 0.1367280858103186 0.2503882416058332 0.3407706816215068 0.13991061761043966 0.4394513969309628 0.37299842783249915 0.6881083531770855 0.6617652128916234 0.3356163641437888 0.633591438177973 0.2270530245732516 0.08564128843136132 0.08918673382140696 0.38108484307304025 0.2879103682935238 0.03743588551878929 0.16190698859281838 0.5442090569995344 0.9782140927854925 0.7281343133654445 0.8959968844428658 0.9710898660123348 0.9659751122817397 0.8550994270481169 0.7798358544241637 0.21895540435798466 0.47062039049342275 0.9463196953292936 0.16630126326344907 0.17202942166477442 0.060727793257683516 0.9646205923054367 0.7798067824915051 0.40155754797160625 0.6616292279213667 0.278131868224591 0.06995663419365883 0.9850121419876814 0.6142154338303953 0.30562002025544643 0.38849155488424003 0.9468732273671776 0.6018442497588694 0.3051181042101234 0.30235120351426303 0.5040468256920576 0.14324497105553746 0.05515951314009726 0.9916498868260533 0.7399833251256496 0.9040463992860168 0.6489426465705037 0.6927148157265037 0.5836423919536173 0.010520953685045242 0.1673461825121194 0.9459120449610054 0.7859659229870886 0.3802800406701863 0.3338597200345248 0.38362456066533923 0.2603508613537997 0.44760442478582263 0.8442732116673142 0.7357446802780032 0.325843742582947 0.1629238303285092 0.5950239540543407 0.7880567463580519 0.41093407361768186 0.45484352204948664 0.03207776602357626 0.794857315486297 0.24861530447378755 0.03428574767895043 0.8560385114978999 0.883311505895108 0.3438062600325793 0.46712828683666885]) - (deftest well-1024a-test (is (= (take 1000 - (criterium.well/well-rng-1024a + (well/well-rng-1024a (long-array 32 [512 419 150 58 784 546 1012 939 419 520 825 296 222 87 357 198 534 728 325 427 300 1021 856 208 318 272 688 760 229 14 135 610]) 0)) well-1024a-res0))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec add-mod-32-test-property 100 + (prop/for-all + [a gen/small-integer + b gen/small-integer] + (is (<= 0 (well/add-mod-32 (long a) (long b)) 31)))) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec well-1024a-test-property 100 + (prop/for-all + [random-seed (gen/large-integer* {:min 0x111111}) + well-index (gen-bounded 0 31)] + (let [random-source (java.util.Random. random-seed) + ;; for well-state, we want random values that are not bounded by test-check's + ;; size + well-state (->> #(.nextInt random-source) + repeatedly + (filter (complement zero?)) + (take 32) + (map #(Math/abs ^long %)) + long-array) + values (->> (well/well-rng-1024a well-state well-index) + (take 10000) + vec)] + (test-max-error (stats/mean values) 0.5 2e-2) + (test-max-error (stats/variance values) (/ 1.0 12) 1e-2)))) diff --git a/bases/criterium/test/criterium/util/ziggurat_test.clj b/bases/criterium/test/criterium/util/ziggurat_test.clj new file mode 100644 index 0000000..1a702a6 --- /dev/null +++ b/bases/criterium/test/criterium/util/ziggurat_test.clj @@ -0,0 +1,30 @@ +(ns criterium.util.ziggurat-test + (:require + [clojure.test :refer [is]] + [clojure.test.check.clojure-test :refer [defspec]] + [clojure.test.check.generators :as gen] + [clojure.test.check.properties :as prop] + [criterium.test-utils :refer [abs-error]] + [criterium.util.stats :as stats] + [criterium.util.ziggurat :as ziggurat])) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(defspec random-normal-zig-test-property 10 + (prop/for-all + [random-seed gen/small-integer] + (let [random-source (java.util.Random. random-seed) + values (->> #(.nextDouble random-source) + repeatedly + ziggurat/random-normal-zig + (take 10000) + vec) + mean (stats/mean values) + variance (stats/variance values) + mean-error (abs-error mean 0.0) + variance-error (abs-error variance 1.0) + mean-tol 1e-1 + variance-tol 15e-1] + (is (< mean-error mean-tol)) + (is (< variance-error variance-tol)) + (and (< mean-error mean-tol) + (< variance-error variance-tol))))) diff --git a/bases/criterium/test/criterium/viewer/portal_test.clj b/bases/criterium/test/criterium/viewer/portal_test.clj new file mode 100644 index 0000000..349d481 --- /dev/null +++ b/bases/criterium/test/criterium/viewer/portal_test.clj @@ -0,0 +1,189 @@ +(ns criterium.viewer.portal-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.analyse :as analyse] + [criterium.test-data :as test-data] + [criterium.view :as view] + [criterium.viewer.portal :as portal]) + (:import + [java.util Queue])) + +(set! *unchecked-math* false) + +(defmacro with-tap-out [& body] + `(let [v# (volatile! []) + f# (fn [x#] + (when-not (= ::portal/_ x#) + (vswap! v# conj x#)))] + (try + (add-tap f#) + ~@body + (loop [] + (when-not (.isEmpty ^Queue @#'clojure.core/tapq) + (recur))) + (loop [] + (when (empty? @v#) + (recur))) + (portal/flush) + @v# + (finally + (remove-tap f#))))) + +(deftest portal-samples-test + (testing "portal-samples" + (testing "charts the sample data" + (let [[title chart] (with-tap-out + (view/samples* + :portal + {} + (:data (test-data/samples-with-2-values-map))))] + (is (= [{:elapsed-time 1.0, :index 0, :outlier ""} + {:elapsed-time 1.0, :index 1, :outlier ""}] + (-> chart :vconcat first :layer first :data :values))) + (is (= [:b "Samples"] title)))) + (testing "charts with transformed sample data" + (let [[title chart] (with-tap-out + (view/samples* + :portal + {} + (:data + (test-data/samples-with-transformed-values-map))))] + (is (= [{:elapsed-time 1.0, :index 0, :outlier ""} + {:elapsed-time 2.0, :index 1, :outlier ""} + {:elapsed-time 4.0, :index 2, :outlier ""}] + (-> chart :vconcat first :layer first :data :values))) + (is (= [:b "Samples"] title)))) + (testing "charts the sample data" + (let [bench-map (:data (test-data/samples-with-outliers-values-map)) + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + outliers (analyse/outliers) + stats (analyse/stats) + view (view/samples) + [title chart] (with-tap-out + (->> bench-map + quantiles + outliers + stats + (view :portal)))] + (is (= [{:elapsed-time 9.0, :index 0, :outlier ""} + {:elapsed-time 10.0, :index 1, :outlier ""} + {:elapsed-time 9.0, :index 2, :outlier ""} + {:elapsed-time 10.0, :index 3, :outlier ""} + {:elapsed-time 9.0, :index 4, :outlier ""} + {:elapsed-time 10.0, :index 5, :outlier ""} + {:elapsed-time 10000.0, :index 6, :outlier :high-severe}] + (-> chart :vconcat first :layer first :data :values))) + (is (= [:b "Samples"] title)))))) + +(deftest portal-sample-percentiles-test + (testing "portal-percentiles" + (testing "charts the sample data" + (let [[title chart] (with-tap-out + (view/sample-percentiles* + :portal + {} + (:data (test-data/samples-with-2-values-map))))] + (is (= [{:elapsed-time 1.0, :x 0.0, :p 0} + {:elapsed-time 1.0, :x 1.0, :p 100.0}] + (-> chart :vconcat first :layer first :data :values))) + (is (= [:b "Percentiles"] title)))))) + +(deftest portal-histogram-test + (testing "portal-histogram" + (testing "charts the sample data" + (let [data-map + (:data (test-data/samples-with-2-values-map)) + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + outliers (analyse/outliers) + stats (analyse/stats) + view-histogrem (view/histogram) + [title chart] (with-tap-out + (->> data-map + quantiles + outliers + stats + (view-histogrem :portal)))] + (is (= [{:elapsed-time 1.0, :index 0, :outlier ""} + {:elapsed-time 1.0, :index 1, :outlier ""}] + (-> chart :vconcat first :layer first :data :values))) + (is (= [:b "Histogram"] title)))))) + +(deftest portal-stats-test + (testing "print-stats" + (testing "prints via output-view" + (is (= [[:b "Summary stats"] + [{:_metric "Elapsed Time ns", + :mean 100.0 + :min-val 89.0 + :mean-minus-3sigma 88.0 + :mean-plus-3sigma 112.0 + :max-val 114.0}]] + (with-tap-out + (view/stats* + :portal + {} + (:data (test-data/bench-stats-map)))))) + + (is (= [[:b "Summary stats"] + [{:_metric "Elapsed Time ns", + :mean 1.00, + :min-val 1.00, + :mean-minus-3sigma 1.00, + :mean-plus-3sigma 1.00, + :max-val 1.00}]] + (let [data-map + (:data (test-data/samples-with-2-values-map)) + stats (analyse/stats) + view-stats (view/stats)] + (with-tap-out + (->> data-map + stats + (view-stats :portal))))))))) + +(deftest portal-outlier-count-test + (testing "print-outlier-count" + (testing "prints via view" + (is + (= [[:b "Outliers"] + [{:low-severe 0, + :low-mild 2, + :high-mild 3, + :high-severe 0, + :_metric "Elapsed Time"}]] + (with-tap-out + ((view/outlier-counts) + :portal + (:data (test-data/outlier-count-map))))))))) + +(deftest portal-outlier-significance-test + (testing "print-outlier-significance" + (testing "prints via view" + (is (= [[:b "Outlier Significance"] + [{:effect :moderate :significance 0.25}]] + (with-tap-out + ((view/outlier-significance) + :portal + (:data (test-data/outlier-significance-map))))))))) + +(deftest portal-event-stats-test + (testing "print-event-stats" + (testing "prints via report" + (is (= [[:b "Event stats"] + [{:metric "ClassLoader", + :sample-count "1.0", + :loaded-count "1.0", + :unloaded-count "1.0"} + {:metric "JIT compilation", + :sample-count "1.0", + :time-ms "3.00 ms"} + {:metric "Garbage Collector", + :total-sample-count "1.0", + :total-count "2.0", + :total-time-ms "1.00 ms"}]] + (let [data-map (:data (test-data/samples-for-event-stats-map)) + event-stats (analyse/event-stats) + view (view/event-stats)] + (with-tap-out + (->> data-map + event-stats + (view :portal))))))))) diff --git a/bases/criterium/test/criterium/viewer/pprint_test.clj b/bases/criterium/test/criterium/viewer/pprint_test.clj new file mode 100644 index 0000000..2157938 --- /dev/null +++ b/bases/criterium/test/criterium/viewer/pprint_test.clj @@ -0,0 +1,95 @@ +(ns criterium.viewer.pprint-test + (:require + [clojure.test :refer [deftest is testing]] + [criterium.analyse :as analyse] + [criterium.collect-plan :as collect-plan] + [criterium.collector.metrics :as metrics] + [criterium.metric :as metric] + [criterium.test-utils :refer [trimmed-lines]] + [criterium.view :as view] + [criterium.viewer.pprint] + [criterium.test-data :as test-data])) + +(def expected-stats-1 + ["" + "| :_metric | :mean-minus-3sigma | :mean | :mean-plus-3sigma | :min-val | :max-val |" + "|-----------------+--------------------+-------+-------------------+----------+----------|" + "| Elapsed Time ns | 88.0 | 100.0 | 112.0 | 89.0 | 114.0 |"]) + +(def expected-stats-2 + ["" + "| :_metric | :mean-minus-3sigma | :mean | :mean-plus-3sigma | :min-val | :max-val |" + "|-----------------+--------------------+-------+-------------------+----------+----------|" + "| Elapsed Time ns | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |"]) + +(deftest pprint-stats-test + (testing "print-stats" + (testing "prints via output-view" + (is (= expected-stats-1 + (trimmed-lines + (with-out-str + (view/stats* + :pprint + {} + (:data (test-data/bench-stats-map))))))) + + (is (= expected-stats-2 + (let [data-map (:data (test-data/samples-with-2-values-map)) + stats (analyse/stats) + view-stats (view/stats)] + (trimmed-lines + (with-out-str + (->> data-map + stats + (view-stats :pprint)))))))))) + +(def expected-counts + ["" + "| :_metric | :low-severe | :low-mild | :high-mild | :high-severe |" + "|--------------+-------------+-----------+------------+--------------|" + "| Elapsed Time | 0 | 2 | 3 | 0 |"]) + +(deftest print-outlier-count-test + (testing "print-outlier-count" + (testing "prints via view" + (is + (= expected-counts + (trimmed-lines + (with-out-str + ((view/outlier-counts) + :pprint + (:data (test-data/outlier-count-map)))))))))) + +(deftest print-outlier-significance-test + (testing "print-outlier-significance" + (testing "prints via view" + (is (= ["" + "| :effect | :significance |" + "|-----------+---------------|" + "| :moderate | 0.25 |"] + (trimmed-lines + (with-out-str + ((view/outlier-significance) + :pprint + (:data (test-data/outlier-significance-map)))))))))) + +(def ^:private expected-event-stats + ["" + "| :metric | :sample-count | :loaded-count | :unloaded-count | :time-ms | :total-sample-count | :total-count | :total-time-ms |" + "|-------------------+---------------+---------------+-----------------+----------+---------------------+--------------+----------------|" + "| ClassLoader | 1.0 | 1.0 | 1.0 | | | | |" + "| JIT compilation | 1.0 | | | 3.00 ms | | | |" + "| Garbage Collector | | | | | 1.0 | 2.0 | 1.00 ms |"]) + +(deftest pprint-event-stats-test + (testing "print-event-stats" + (testing "prints via report" + (is (= expected-event-stats + (let [data-map (:data (test-data/samples-for-event-stats-map)) + event-stats (analyse/event-stats) + view (view/event-stats)] + (trimmed-lines + (with-out-str + (->> data-map + event-stats + (view :pprint)))))))))) diff --git a/bases/criterium/test/criterium/viewer/print_test.clj b/bases/criterium/test/criterium/viewer/print_test.clj new file mode 100644 index 0000000..dd1f4cd --- /dev/null +++ b/bases/criterium/test/criterium/viewer/print_test.clj @@ -0,0 +1,306 @@ +(ns criterium.viewer.print-test + (:require + [clojure.string :as str] + [clojure.test :refer [deftest is testing]] + [criterium.analyse :as analyse] + [criterium.collector.metrics :as metrics] + [criterium.test-utils :refer [trimmed-lines]] + [criterium.util.bootstrap :as bootstrap] + [criterium.view :as view] + [criterium.viewer.print :as print] + [criterium.collect-plan.config :as collect-plan-config] + [criterium.collect-plan :as collect-plan] + [criterium.test-data :as test-data])) + +(deftest print-stat-test + (testing "print-stat" + (is (= "Elapsed Time: 100 ns 3σ [88.0 112] min 89.0" + (str/trim + (with-out-str + (print/print-stat + {:label "Elapsed Time" + :scale 1e-9 + :dimension :time} + {:mean 100.0 + :variance 16.0 + :mean-plus-3sigma 112.0 + :mean-minus-3sigma 88.0 + :min-val 89.0} + [collect-plan/identity-transforms]))))))) + +(defn identity-transform [samples] + (with-meta samples {:transform {:sample-> identity :->sample identity}})) + +(deftest print-stats-test + (testing "print-stats" + (testing "prints via output-view" + (is (= ["Elapsed Time: 100 ns 3σ [88.0 112] min 89.0"] + (trimmed-lines + (with-out-str + (view/stats* + :print + {} + (:data (test-data/bench-stats-map))))))) + + (is (= ["Elapsed Time: 1.00 ns 3σ [1.00 1.00] min 1.00"] + (let [data-map (:data (test-data/samples-with-2-values-map)) + stats (analyse/stats) + view-stats (view/stats)] + (trimmed-lines + (with-out-str + (->> data-map + stats + (view-stats :print))))))))) + (testing "print-stats" + (testing "prints via output-view" + (is (= ["Elapsed Time: 100 ns 3σ [88.0 112] min 89.0"] + (trimmed-lines + (with-out-str + (view/stats* + :print + {} + (:data (test-data/bench-stats-map))))))) + + (is (= ["Elapsed Time: 1.00 ns 3σ [1.00 1.00] min 1.00"] + (let [data-map (:data (test-data/samples-with-2-values-map)) + stats (analyse/stats) + view-stats (view/stats)] + (trimmed-lines + (with-out-str + (->> data-map + stats + (view-stats :print)))))))) + (testing "prints via output-view" + (is (= ["Elapsed Time: 100 ns 3σ [88.0 112] min 89.0"] + (trimmed-lines + (with-out-str + (view/stats* + :print + {} + (:data (test-data/bench-stats-map))))))) + + (is (= ["Elapsed Time: 5.00 ns 3σ [-5.39 15.4] min 1.00"] + (let [data-map (:data (test-data/samples-with-variance-12-map)) + stats (analyse/stats) + view-stats (view/stats)] + (trimmed-lines + (with-out-str + (->> data-map + stats + (view-stats :print))))))) + (is (= ["Elapsed Time: 2.50 ns 3σ [-2.70 7.70] min 0.500"] + (let [data-map + (-> (update-in + (:data (test-data/samples-with-variance-12-map)) + [:samples] + merge + {:batch-size 2 + :transform (#'collect-plan/batch-transforms 2)})) + stats (analyse/stats) + view-stats (view/stats)] + (trimmed-lines + (with-out-str + (->> data-map + stats + (view-stats :print)))))))))) + + +(deftest print-booststrap-stat-test + (testing "print-bootstrap-stat" + (is (= ["Elapsed Time min: 16.0 ns CI [9.00 25.0] (0.050 0.950)" + "Elapsed Time mean: 100 ns CI [95.0 105] (0.050 0.950)" + "Elapsed Time 3σ: [76.0 124] ns"] + (trimmed-lines + (with-out-str + (print/print-bootstrap-stat + {:scale 1e-9 :dimension :time :path [:elapsed-time] + :label "Elapsed Time"} + {:mean {:point-estimate 100.0 + :estimate-quantiles + [{:value 95.0 :alpha 0.05} + {:value 105.0 :alpha 0.95}]} + :variance {:point-estimate 16.0 + :estimate-quantiles + [{:value 9.0 :alpha 0.05} + {:value 25.0 :alpha 0.95}]} + :min-val {:point-estimate 16.0 + :estimate-quantiles + [{:value 9.0 :alpha 0.05} + {:value 25.0 :alpha 0.95}]} + :mean-plus-3sigma {:point-estimate 124.0 + :estimate-quantiles + [{:value 9.0 :alpha 0.05} + {:value 25.0 :alpha 0.95}]} + :mean-minus-3sigma {:point-estimate 76.0 + :estimate-quantiles + [{:value 9.0 :alpha 0.05} + {:value 25.0 :alpha 0.95}]}}))))) + (is (= ["Elapsed Time min: 1.00 ns CI [1.00 1.00] (0.025 0.975)" + "Elapsed Time mean: 1.00 ns CI [1.00 1.00] (0.025 0.975)" + "Elapsed Time 3σ: [1.00 1.00] ns"] + (let [data-map + {:samples + {:type :criterium/collected-metrics-samples + :metric->values {[:elapsed-time] [1 1 1]} + :metrics-defs (select-keys + (metrics/metrics) + [:elapsed-time]) + :transform collect-plan/identity-transforms + :batch-size 1 + :eval-count 1 + :elapsed-time 1}} + bootstrap (bootstrap/bootstrap-stats + {:quantiles [0.025 0.975] + :estimate-quantiles [0.025 0.975]}) + view (view/bootstrap-stats {})] + (trimmed-lines + (with-out-str + (->> data-map + bootstrap + (view :print))))))))) + +(deftest print-samples-test + (testing "print-samples" + (testing "prints via view" + (is (= ["Samples: 7 samples with batch-size 1" + "Elapsed Time" + "[ 6] 10.0 µs high-severe"] + (let [bench-map + (:data (test-data/samples-with-outliers-values-map)) + quantiles (analyse/quantiles {:quantiles [0.9 0.99 0.99]}) + outliers (analyse/outliers) + stats (analyse/stats) + view (view/samples)] + (trimmed-lines + (with-out-str + (->> bench-map + quantiles + outliers + stats + (view :print)))))))))) + +(deftest print-outlier-count-test + (testing "print-outlier-count" + (testing "prints all outliers when all present" + (is (= ["M: Found 10 outliers in 100 samples (10.0 %)" + "low-severe\t 1 (1.0000 %)" + "low-mild\t 2 (2.0000 %)" + "high-mild\t 3 (3.0000 %)" + "high-severe\t 4 (4.0000 %)"] + (trimmed-lines + (with-out-str (print/print-outlier-count + {:label "M"} + 100 + {:outlier-counts + (analyse/outlier-count 1 2 3 4)})))))) + (testing "prints only present outliers" + (is (= ["M: Found 5 outliers in 100 samples (5.00 %)" + "low-mild\t 2 (2.0000 %)" + "high-mild\t 3 (3.0000 %)"] + (trimmed-lines + (with-out-str + (print/print-outlier-count + {:label "M"} + 100 + {:outlier-counts + (analyse/outlier-count + 0 2 3 0)})))))) + (testing "prints via view" + (is (= ["Elapsed Time: Found 5 outliers in 1 samples (500 %)" + "low-mild\t 2 (200.0000 %)" + "high-mild\t 3 (300.0000 %)"] + (trimmed-lines + (with-out-str + (let [data-map + (:data (test-data/outlier-count-map)) + view (view/outlier-counts)] + (view :print data-map))))))))) + +(deftest print-outlier-significance-test + (testing "print-outlier-significance" + (testing "prints via view" + (is (= [(str "Elapsed Time Variance contribution from outliers : 25.0 %" + "Elapsed Time Variance is moderately inflated by outliers")] + (trimmed-lines + (with-out-str + ((view/outlier-significance) + :print + (:data (test-data/outlier-significance-map)))))))))) + +(deftest print-event-stats-test + (testing "print-event-stats" + (testing "prints via report" + (is (= + ["ClassLoader: loaded 1 and unloaded 1 classes in 1 samples" + "JIT compilation: ran for 3.00 ms in 1 samples" + (str "Garbage Collector: ran 2 times for a total of 1.00 ms " + "in 1 samples")] + (let [data-map (:data (test-data/samples-for-event-stats-map)) + event-stats (analyse/event-stats) + view (view/event-stats)] + (trimmed-lines + (with-out-str + (->> data-map + event-stats + (view :print)))))))))) + +(deftest print-final-gc-warnings-test + (testing "print-final-gc-warnings-test" + (testing "prints via view" + (is (= ["Final GC ran for 1.00 ms, 1.0% of total sampling time (100 ms)"] + (let [metrics-defs (-> + (select-keys + (metrics/metrics) + [:elapsed-time :class-loader :compilation]) + (assoc-in + [:garbage-collector :values] + [{:path + [:garbage-collector :total :count] + :label "GC total count" + :scale 1 + :type :event + :dimension :count} + {:path + [:garbage-collector :total :time-ms] + :label "GC total time" + :scale 1e-3 + :type :event + :dimension :time}])) + view1 (view/final-gc-warnings + {:warn-threshold 0.01 + :sampled-path [:sampled]}) + view2 (view/final-gc-warnings + {:view-type :final-gc-warnings + :warn-threshold 0.02 + :sampled-path [:sampled]}) + data-map + {:samples + {:type :criterium/collected-metrics-samples + :metric->values + {[:elapsed-time] [99999999]} + :metrics-deps metrics-defs + :batch-size 1 + :eval-count 1 + :elapsed-time 1} + :final-gc + {:type :criterium/collected-metrics-samples + :metric->values + {[:compilation :time-ms] [3] + [:garbage-collector :total :time-ms] [1] + [:elapsed-time] [1]} + :metrics-deps metrics-defs + :batch-size 1 + :eval-count 1 + :elapsed-time 1}}] + (trimmed-lines + (with-out-str + (view1 :print data-map) + (view2 :print data-map))))))))) + +(deftest print-os-test + (let [s (with-out-str ((view/os) :print {}))] + (is (str/ends-with? s "cpu(s)\n")))) + +(deftest print-runtime-test + (let [s (with-out-str ((view/runtime) :print {}))] + (is (not (str/blank? s))))) diff --git a/bases/criterium/tests.edn b/bases/criterium/tests.edn new file mode 100644 index 0000000..54326e9 --- /dev/null +++ b/bases/criterium/tests.edn @@ -0,0 +1,28 @@ +#kaocha/v1 +{:tests [{:kaocha.testable/type :kaocha.type/clojure.test + :ns-patterns ["-test$"] + :source-paths ["src"] + :test-paths ["test"] + :skip-meta [:very-slow]}] + + :plugins #profile {:default [:kaocha.plugin/randomize + :kaocha.plugin/filter + :kaocha.plugin/capture-output + :kaocha.plugin/profiling + :kaocha.plugin/cloverage] + :no-coverage [:kaocha.plugin/randomize + :kaocha.plugin/filter + :kaocha.plugin/capture-output + :kaocha.plugin/profiling]} + :cloverage/opts {} + :capture-output? #profile {:default true + :dots true + :debug false} + :reporter #profile {:dots kaocha.report/dots + :default kaocha.report/documentation + :progress kaocha.report.progress/report + :debug kaocha.view/debug} + :kaocha.plugin.randomize/randomize? true + :kaocha.plugin.profiling/count 10 + :kaocha.plugin.profiling/profiling? #profile {:default false + :profile true}} diff --git a/benchmarks/criterium/implementation/array.clj b/benchmarks/criterium/implementation/array.clj index 35e3468..a69b956 100644 --- a/benchmarks/criterium/implementation/array.clj +++ b/benchmarks/criterium/implementation/array.clj @@ -10,7 +10,7 @@ (let [^objects arr ret-vals-arr arr-size-1 (long (dec (count arr))) init-j (rem (dec n) max-obj-array-size)] - (time-body + (time-expr (loop [i (long (dec n)) j (long init-j) v (f)] @@ -20,6 +20,12 @@ (if (zero? j) arr-size-1 (unchecked-dec j)) (f))))))) + +(defn replace-ret-val-in-time-expr-result + [[elapsed-time _] new-ret-val] + [elapsed-time new-ret-val]) + + (defn execute-expr-core-with-array [n f reduce-with] (let [arr-size (int (min max-obj-array-size n)) @@ -30,7 +36,7 @@ v (aget ret-vals-arr i)] (if (pos? i) (recur (dec i) (reduce-with v (aget ret-vals-arr (dec i)))) - (replace-ret-val-in-time-body-result time-and-ret v))))) + (replace-ret-val-in-time-expr-result time-and-ret v))))) (defn with-array [f] (with-redefs [criterium.core/execute-expr execute-expr-core-with-array] diff --git a/benchmarks/criterium/implementation/type.clj b/benchmarks/criterium/implementation/type.clj index e0c921c..1b9f3fa 100644 --- a/benchmarks/criterium/implementation/type.clj +++ b/benchmarks/criterium/implementation/type.clj @@ -1,7 +1,7 @@ (ns criterium.implementation.type "Use a deftype to store sample results" (:use - [criterium.core :only [time-body]])) + [criterium.core :only [time-expr]])) (defprotocol MutablePlace @@ -19,7 +19,7 @@ (defn execute-expr-core-timed-part-volatile [n f] - (time-body + (time-expr (loop [i (long (dec n)) v (f)] (set-mutable-place volatile-place v) @@ -47,7 +47,7 @@ (defn execute-expr-core-timed-part-unsynchronized [n f] - (time-body + (time-expr (loop [i (long (dec n)) v (f)] (set-mutable-place unsynchronized-place v) diff --git a/build/src/build.clj b/build/src/build.clj new file mode 100644 index 0000000..a788d10 --- /dev/null +++ b/build/src/build.clj @@ -0,0 +1,103 @@ +(ns build + (:refer-clojure :exclude [test]) + (:require + [build.tasks :as tasks])) + + +;; (tasks/require +;; ns-tree +;; help) + +;; (defn ^{:params []} build +;; "Build projects" +;; [params] +;; (tasks/poly-tool +;; (merge params {:aliases [:build] +;; :exec-fn 'build +;; :exec-args {} +;; :no-propagation true +;; :elements [:projects]}))) + +(defn ^{:params []} clean + "Clean projects." + [params] + (tasks/clean params)) + +(defn ^{:params []} jar + "Build jar." + [params] + (tasks/jar params)) + +(defn ^{:params []} install + "Install jar." + [params] + (tasks/install params)) + +;; (defn ^{:params []} cljfmt +;; "Run `cljfmt check` on workspace" +;; [params] +;; (tasks/poly-main +;; (merge +;; (select-keys params [:verbose]) +;; {:aliases [:cljfmt] +;; :args ["check"]}))) + +;; (defn ^{:params []} cljfmt-fix +;; "Run `cljfmt fix` on workspace" +;; [params] +;; (merge +;; (select-keys params [:verbose]) +;; (tasks/poly-main +;; {:aliases [:cljfmt] +;; :args ["fix"]}))) + +;; (defn ^{:params []} test +;; "Run `poly test` on workspace. + +;; Note: you can run `poly test` directly." +;; [_params] +;; (b/process {:command-args ["poly" "test"] :out :inherit})) + +;; (defn- java-home +;; [] +;; (or (System/getProperty "java.home") ; in clojure +;; (edn/read-string +;; (:out (shell/sh "clojure" "-M" "-e" "(System/getProperty \"java.home\")"))))) + +;; (defn- os-name +;; [] +;; (let [n (System/getProperty "os.name")] +;; (cond +;; (= "Mac OS X" n) +;; "darwin" + +;; (= "Linux" n) +;; "linux" + +;; :else +;; n))) + +;; (defn compile-agent-cpp [args] +;; (let [os-name (os-name) +;; java-home (java-home)] +;; ;; temporary prn to see values on gitub +;; (prn :java-home java-home +;; :os-name os-name) +;; (process/check +;; (process/process +;; ["make"] +;; {:out :inherit +;; :err :inherit +;; :dir "agent-cpp" +;; :extra-env (cond-> {"JAVA_HOME" java-home} +;; os-name (assoc "OSNAME" os-name))})))) + +;; (defn javac-agent [args] +;; #_(tasks/poly {:on ":base:agent" +;; :task "javac"}) +;; (process/check +;; (process/process +;; ["mj" "javac"] +;; {:out :inherit +;; :err :inherit +;; :dir "bases/agent"}))) diff --git a/compile_commands.json b/compile_commands.json new file mode 100644 index 0000000..c979903 --- /dev/null +++ b/compile_commands.json @@ -0,0 +1,16 @@ +[ + { + "arguments": [ + "/usr/bin/clang", + "-c", + "-std=c++17", + "-I.", + "-I/Library/Java/JavaVirtualMachines/jdk-11.0.12.jdk/Contents/Home/include", + "-I/Library/Java/JavaVirtualMachines/jdk-11.0.12.jdk/Contents/Home/include/darwin", + "-oliballocsampler.dylib", + "sampler.cpp" + ], + "directory": "/Users/duncan/projects/hugoduncan/criterium/allocation-sampler", + "file": "/Users/duncan/projects/hugoduncan/criterium/allocation-sampler/sampler.cpp" + } +] diff --git a/deps.edn b/deps.edn new file mode 100644 index 0000000..45090f7 --- /dev/null +++ b/deps.edn @@ -0,0 +1,65 @@ +{:paths [] + :aliases + {:dev + {:extra-paths ["bases/criterium/test" "development/src"] + :extra-deps + {poly/agent {:local/root "bases/agent"} + poly/criterium {:local/root "bases/criterium"} + poly/notebooks {:local/root "bases/notebooks"} + lambdaisland/kaocha {:mvn/version "1.87.1366"} + lambdaisland/kaocha-cloverage {:mvn/version "1.1.89"} + org.clojure/clojure {:mvn/version "1.12.0"}} + :jvm-opts ["-Djdk.attach.allowAttachSelf"]} + :test {:extra-paths ["bases/criterium/test"] + :extra-deps + {local/agent {:local/root "bases/agent"} + org.clojure/test.check {:mvn/version "1.1.1"}} + :jvm-opts ["-XX:-OmitStackTraceInFastThrow" + "-Dclojure.main.report=stderr" + #_"-agentpath:agent-cpp/libcriterium.dylib"]} + :kaocha {:extra-deps + {lambdaisland/kaocha {:mvn/version "1.91.1392"} + lambdaisland/kaocha-cloverage {:mvn/version "1.1.89"} + local/agent {:local/root "bases/agent"}} + :exec-fn kaocha.runner/exec-fn + :exec-args {} + :main-opts ["-m" "kaocha.runner"]} + :platform {:exec-fn criterium.platform/exec-main + :exec-args {}} + :poly {:main-opts ["-m" "polylith.clj.core.poly-cli.core"] + :extra-deps + {polylith/clj-poly + {:git/url "https://github.com/polyfy/polylith" + :git/tag "v0.2.19" + :git/sha "e488b44" + :deps/root "projects/poly"} + org.apache.logging.log4j/log4j-api + {:mvn/version "2.20.0"} + org.apache.logging.log4j/log4j-slf4j-impl + {:mvn/version "2.20.0"} + polylith-kaocha/test-runner + {:git/url "https://github.com/imrekoszo/polylith-kaocha" + :git/tag "v0.8.4" + :git/sha "f096de8" + :deps/root "projects/test-runner"}}} + :build {;; :extra-paths ["build/src"] + :deps {local/build {:local/root "build"}} + :ns-default build.tasks + :exec-fn help + :jvm-opts ["-Dclojure.main.report=stderr"]} + :cljfmt {:deps {cljfmt/cljfmt {:mvn/version "0.9.0"}} + :main-opts ["-m" "cljfmt.main"]} + :outdated {:deps {com.github.liquidz/antq {:mvn/version "RELEASE"}} + :main-opts ["-m" "antq.core"]} + :jit-info {:jvm-opts ["-XX:+UnlockDiagnosticVMOptions" + "-XX:+PrintInlining"]} + :compilation {:jvm-opts ["-XX:+UnlockDiagnosticVMOptions" + "-XX:+PrintCompilation"]} + :disable-bg {:jvm-opts ["-Xbatch"]} + :parallel-gc {:jvm-opts ["-XX:+UseParallelGC"]} + :g1-gc {:jvm-opts ["-XX:+UseG1GC"]} + :epsilon-gc {:jvm-opts ["-XX:+UnlockExperimentalVMOptions" + "-XX:+UseEpsilonGC"]} + :server {:jvm-opts ["-server"]} + } + } diff --git a/design/glossary.md b/design/glossary.md new file mode 100644 index 0000000..7d34dd2 --- /dev/null +++ b/design/glossary.md @@ -0,0 +1,85 @@ +# Glossary + +## A + +**Allocation Tracking** +Monitoring and recording memory allocations during benchmark execution +using JVM tooling. + +## B + +**Batch Size** +The number of times an expression is evaluated in a single timing measurement. + +**Benchmark** +A complete performance measurement process including warmup, sample +collection, and statistical analysis. + +**Bootstrap** +A statistical resampling technique used to estimate confidence intervals +and improve accuracy of performance metrics. + +## C + +**Collect Plan** +A strategy for collecting benchmark samples, including warmup period and +measurement intervals. + +## E + +**Elapsed Time** +The total wall clock time taken to execute an expression. + +**Evaluation Count** +The total number of times an expression is evaluated during benchmarking. + +## J + +**JIT (Just-In-Time) Compilation** +The process where the JVM compiles frequently executed code into native +machine code during runtime. + +## M + +**Measured** +A wrapper that captures both timing information and expression results +during benchmarking. + +**Metrics** +Various measurements collected during benchmarking (e.g., elapsed time, +memory usage, GC activity). + +## P + +**Pipeline** +A sequence of operations for collecting and processing benchmark measurements. + +## S + +**Sample** +A single timing measurement of expression execution. + +**State** +The captured context or arguments needed to execute a benchmarked expression. + +## T + +**Transform** +Functions for converting between raw sample values and their transformed +representations. + +## V + +**Viewer** +A component responsible for presenting benchmark results in a specific +format (e.g., print, portal). + +## W + +**Warmup** +Initial execution period allowing the JVM to optimize code before taking +measurements. + +**Well RNG** +WELL (Well-Equidistributed Long-period Linear) random number generator +used for statistical sampling. diff --git a/design/inactive/histogram.md b/design/inactive/histogram.md new file mode 100644 index 0000000..ffec512 --- /dev/null +++ b/design/inactive/histogram.md @@ -0,0 +1,43 @@ +# Histogram Core Requirements: + +## Input + +Required: Vector of numeric values +Optional: Pre-computed IQR value +Only validation is non-empty vector + + +## Bin Determination + +Use Freedman-Diaconis rule for bin width: + +width = 2 * IQR * n^(-1/3) +Calculate IQR if not provided + + +Number of bins derived from: + +Data range (max - min) +Computed bin width + + +Bin edges should exactly cover data range +All data points must be included in binning + + +## Output Format + +Return a map containing: + +Vector of bin counts +Vector of bin centers +Bin width (constant) +Vector of probability density values +Total number of samples (for verification) +Min and max values (for verification) + + +## Error Handling + +Empty input vector throws +Degenerate cases (all same value) throws diff --git a/design/inactive/streaming.md b/design/inactive/streaming.md new file mode 100644 index 0000000..91adb08 --- /dev/null +++ b/design/inactive/streaming.md @@ -0,0 +1,58 @@ +# Streaming Statistics Design Decision + +## Problem Description + +We need to efficiently estimate and represent a statistical distribution +from streaming data samples while minimizing storage requirements. The +distribution is expected to be uni-modal or bi-modal, approximately +log-normal. Outlier handling is not critical unless they appear in +significant numbers. Both quantile queries and density estimation are +important capabilities. + +## Solution Options + +Several approaches were considered for implementing streaming statistics: + +1. Full Sample Collection - Store all samples and perform + post-collection analysis +2. Reservoir Sampling - Maintain a random sample of fixed size +3. P-square - Streaming quantile estimation +4. T-digest - Adaptive clustering for distribution estimation +5. Histogram - Fixed or adaptive bin-based approximation +6. Streaming Moments - Maintain statistical moments +7. Online KDE - Kernel density estimation with merge operations + +## Decision Matrix + +Each option was evaluated on several criteria using a 1-5 scale where 5 is best: + +| Criteria | Full Sample | Reservoir | P-square | T-digest | Histogram | Streaming Moments | Online KDE | +|----------|-------------|-----------|-----------|-----------|------------|------------------|------------| +| Storage Efficiency | 1 | 4 | 4 | 4 | 3 | 5 | 3 | +| Center Accuracy | 5 | 4 | 4 | 4 | 3 | 5 | 4 | +| Spread Accuracy | 5 | 4 | 4 | 4 | 3 | 3 | 4 | +| Computational Complexity | 3 | 3 | 4 | 3 | 4 | 5 | 2 | +| Query Flexibility | 5 | 4 | 3 | 4 | 3 | 2 | 4 | +| Implementation Complexity | 5 | 4 | 2 | 2 | 4 | 3 | 2 | + +## Final Analysis + +The key contenders were narrowed to T-digest and Online KDE based on +their overall performance characteristics. Given the expected +distribution characteristics (uni/bi-modal, log-normal): + +T-digest advantages: +- More storage efficient for simple modal patterns +- Faster quantile queries +- Less sensitive to parameter tuning +- Better handling of gradual distribution shape changes + +Online KDE advantages: +- Natural handling of log-normal shapes +- Good at detecting and representing bimodality +- Smooth PDF estimation +- Can work directly in log-space + +For the specific requirements of both quantile queries and density +estimation, T-digest provides a better balance of efficiency and +accuracy while maintaining implementation simplicity. diff --git a/design/problem-statement.md b/design/problem-statement.md new file mode 100644 index 0000000..36c314d --- /dev/null +++ b/design/problem-statement.md @@ -0,0 +1,5 @@ +Benchmarking Clojure code requires careful consideration of JVM warmup, +garbage collection, and statistical significance. Running a function +once or using simple timing approaches can produce misleading +results. Developers need accurate, statistically sound measurements of +code performance that account for JVM dynamics. diff --git a/design/validation.md b/design/validation.md new file mode 100644 index 0000000..3326877 --- /dev/null +++ b/design/validation.md @@ -0,0 +1 @@ +Do not use spec or malli for validation. diff --git a/dev/README.md b/dev/README.md new file mode 100644 index 0000000..8b7dfa2 --- /dev/null +++ b/dev/README.md @@ -0,0 +1,4 @@ +# dev project + +This is a project that contains all source and tests, and can be used +when developing criterium. diff --git a/dev/deps.edn b/dev/deps.edn new file mode 100644 index 0000000..e0b5639 --- /dev/null +++ b/dev/deps.edn @@ -0,0 +1,15 @@ +{:deps {org.clojure/clojure {:mvn/version "1.11.2"} + criterium/criterium {:local/root "../criterium"} + criterium/chart {:local/root + "../criterium.chart"} + criterium/arg-gen {:local/root + "../criterium.arg-gen"} + lambdaisland/kaocha {:mvn/version "1.87.1366"} + lambdaisland/kaocha-cloverage {:mvn/version "1.1.89"} + com.clojure-goes-fast/clj-java-decompiler {:mvn/version "0.3.0"} + com.clojure-goes-fast/clj-memory-meter {:mvn/version "0.1.3"}} + :aliases + {:dev {} + :kaocha {:extra-paths ["../criterium/test" + "../criterium.graph/test" + "../criterium.arg-gen/test"]}}} diff --git a/development/deps.edn b/development/deps.edn new file mode 100644 index 0000000..452e5a2 --- /dev/null +++ b/development/deps.edn @@ -0,0 +1,3 @@ +{:deps {poly/criterium {:local/root "../bases/criterium"} + poly/criterium.arg-gen {:local/root "../bases/criterium.arg-gen"} + poly/criterium.chart {:local/root "../bases/criterium.chart"}}} diff --git a/development/src/user.clj b/development/src/user.clj new file mode 100644 index 0000000..793ec28 --- /dev/null +++ b/development/src/user.clj @@ -0,0 +1,4 @@ +(ns user) + +(alter-var-root #'*unchecked-math* (constantly :warn-on-boxed)) +(alter-var-root #'*warn-on-reflection* (constantly true)) diff --git a/profiles.clj b/profiles.clj deleted file mode 100644 index bacfc6b..0000000 --- a/profiles.clj +++ /dev/null @@ -1,38 +0,0 @@ -{:provided {:dependencies [[org.clojure/clojure "1.7.0-alpha3"]] - :global-vars {*unchecked-math* true}} - :1.3 {:dependencies [[org.clojure/clojure "1.3.0"]]} - :1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]} - :1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]} - :1.6 {:dependencies [[org.clojure/clojure "1.6.0"]]} - :1.7 {:dependencies [[org.clojure/clojure "1.7.0"]] - :global-vars {*unchecked-math* true}} - :1.8 {:dependencies [[org.clojure/clojure "1.8.0"]] - :global-vars {*unchecked-math* true}} - :doc - {:dependencies [[codox-md "0.1.0"]] - :codox {:writer codox-md.writer/write-docs - :output-dir "doc/0.4/api" - :src-dir-uri "https://github.com/hugoduncan/criterium/blob/develop" - :src-linenum-anchor-prefix "L"} - :aliases {"marg" ["marg" "-d" "doc/0.4/"] - "codox" ["doc"] - "doc" ["do" "codox," "marg"]}} - :release - {:plugins [[lein-set-version "0.3.0"]] - :set-version - {:updates [{:path "README.md" :no-snapshot true}]}} - :dev {:aliases {"impl-perf" ["with-profile" "+impl" " perforate" "--quick"]} - :plugins [[codox/codox.leiningen "0.6.4"] - [lein-marginalia "0.7.1"]] - :global-vars {*warn-on-reflection* true}} - :impl {:perforate - {:environments - [{:name :array - :namespaces [criterium.implementation] - :fixtures [criterium.implementation.array/with-array]} - {:name :volatile - :namespaces [criterium.implementation] - :fixtures [criterium.implementation.type/with-volatile]} - {:name :unsynchronized - :namespaces [criterium.implementation] - :fixtures [criterium.implementation.type/with-unsynchronized]}]}}} diff --git a/project.clj b/project.clj deleted file mode 100644 index ea9f6a2..0000000 --- a/project.clj +++ /dev/null @@ -1,7 +0,0 @@ -(defproject criterium "0.4.6" - :description "Benchmarking library" - :url "https://github.com/hugoduncan/criterium" - :license {:name "Eclipse Public License" - :url "http://www.eclipse.org/legal/epl-v10.html"} - :scm {:url "git@github.com:hugoduncan/criterium.git"} - :local-repo-classpath true) diff --git a/project.edn b/project.edn new file mode 100644 index 0000000..09a6a95 --- /dev/null +++ b/project.edn @@ -0,0 +1,4 @@ +{:name "all" + :group-id "criterium" + :version-map #include "version.edn" + :version #version-string #ref [:version-map]} diff --git a/projects/agent/build.clj b/projects/agent/build.clj new file mode 100644 index 0000000..4fad65a --- /dev/null +++ b/projects/agent/build.clj @@ -0,0 +1,41 @@ +(ns build + (:require + [babashka.fs :as fs] + [makejack.tasks :as tasks])) + +(tasks/require + help + clean + compile-clj + install + project-data + read-version-file + write-version) + + +(defn load-project-and-write-version + [{:keys [dir] :or {dir "."} :as params}] + (let [path (fs/file dir ".." "criterium" "version.edn") + params (-> (assoc params :path path) + project-data + read-version-file + write-version)] + (println "Updating project to" (:version params)) + params)) + +(defn ^{:params []} jar + "Build jarfile" + [params] + (-> params + load-project-and-write-version + (assoc :manifest {"Agent-Class" "criterium.agent"}) + tasks/jar)) + +(defn ^{:params []} build + "Build projects" + [params] + (-> params + clean + compile-clj + jar + install)) diff --git a/projects/agent/deps.edn b/projects/agent/deps.edn new file mode 100644 index 0000000..915074b --- /dev/null +++ b/projects/agent/deps.edn @@ -0,0 +1,20 @@ +{:deps {poly/agent {:local/root "../../bases/agent"}} + :aliases + {:build {:deps {local/build {:local/root "../../build"}} + :ns-default build.tasks + :exec-fn help + :jvm-opts ["-Dclojure.main.report=stderr"]} + #_ {:extra-paths ["."] + :deps + {io.github.hugoduncan/makejack + #_ {:local/root "../../../makejack/projects/makejack-jar"} + {:git/sha "a92d409233c46250f7c5a6b3e0a0aa6dd2211de6" + :deps/root "projects/makejack-jar"}} + :ns-default build + :exec-fn help + :jvm-opts ["-Dclojure.main.report=stderr"]} + :test {:extra-paths [] + :extra-deps {poly/criterium {:local/root "../../bases/criterium"}} + :jvm-opts ["-XX:-OmitStackTraceInFastThrow" + "-Dclojure.main.report=stderr" + "-agentpath:../../agent-cpp/libcriterium.dylib"]}}} diff --git a/projects/agent/project.edn b/projects/agent/project.edn new file mode 100644 index 0000000..98d5dfe --- /dev/null +++ b/projects/agent/project.edn @@ -0,0 +1,2 @@ +{:name criterium/criterium.agent + :version "0.5.244"} diff --git a/projects/agent/readme.edn b/projects/agent/readme.edn new file mode 100644 index 0000000..738b4cc --- /dev/null +++ b/projects/agent/readme.edn @@ -0,0 +1,13 @@ +# Agent for criterium + +Prepare with: + +clj -X:deps prep + +Provides: + +object-size + + +https://github.com/apangin/jattach +jattach 13177 load instrument diff --git a/projects/criterium/deps.edn b/projects/criterium/deps.edn new file mode 100644 index 0000000..7bedcde --- /dev/null +++ b/projects/criterium/deps.edn @@ -0,0 +1,35 @@ +{:deps {poly/criterium {:local/root "../../bases/criterium"}} + :paths [] + :aliases + {:test {:extra-paths [] + :extra-deps + {org.clojure/clojure {:mvn/version "1.11.2"} + polylith-kaocha/kaocha-wrapper + {:git/url "https://github.com/imrekoszo/polylith-kaocha" + :git/tag "v0.8.4" + :git/sha "f096de8" + :deps/root "projects/kaocha-wrapper"}}} + :build {:deps {local/build {:local/root "../../build"}} + :ns-default build.tasks + :exec-fn help + :jvm-opts ["-Dclojure.main.report=stderr"]} + :platform {:main-opts ["-m" "criterium.platform"] + ;; err, why is this needed? + :extra-deps {org.clojure/test.check + {:mvn/version "1.1.1"}} + :jvm-opts ["-Dclojure.main.report=stderr" + "-Dclojure.spec.check-asserts=false" + ;; "-XX:+UnlockExperimentalVMOptions" + ;; "-XX:+UseEpsilonGC" + "-Xms1g" + "-Xmx1g" + ]} + :platform-1.8 {:main-opts ["-m" "criterium.platform"] + ;; err, why is this needed? + :extra-deps {org.clojure/test.check + {:mvn/version "1.1.1"}} + :jvm-opts ["-Dclojure.main.report=stderr" + "-Dclojure.spec.check-asserts=false" + "-Xms1g" + "-Xmx1g" + ]}}} diff --git a/projects/criterium/project.edn b/projects/criterium/project.edn new file mode 100644 index 0000000..8d34276 --- /dev/null +++ b/projects/criterium/project.edn @@ -0,0 +1,2 @@ +{:name criterium/criterium + :version "0.5.{{git-rev-count}}-ALPHA"} diff --git a/src/criterium/core.clj b/src/criterium/core.clj deleted file mode 100644 index 212168d..0000000 --- a/src/criterium/core.clj +++ /dev/null @@ -1,1003 +0,0 @@ -;;;; Copyright (c) Hugo Duncan. All rights reserved. - -;;;; The use and distribution terms for this software are covered by the -;;;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) -;;;; which can be found in the file epl-v10.html at the root of this distribution. -;;;; By using this software in any fashion, you are agreeing to be bound by -;;;; the terms of this license. -;;;; You must not remove this notice, or any other, from this software. - - -;;;; Criterium - measures expression computation time over multiple invocations - -;;;; Inspired by Brent Broyer's -;;;; http://www.ellipticgroup.com/html/benchmarkingArticle.html -;;;; and also Haskell's Criterion - -;;;; Unlike java solutions, this can benchmark general expressions rather than -;;;; just functions. - -(ns ^{:author "Hugo Duncan" - :see-also - [["http://github.com/hugoduncan/criterium" "Source code"] - ["http://hugoduncan.github.com/criterium" "API Documentation"]]} - criterium.core - "Criterium measures the computation time of an expression. It is -designed to address some of the pitfalls of benchmarking, and benchmarking on -the JVM in particular. - -This includes: - - statistical processing of multiple evaluations - - inclusion of a warm-up period, designed to allow the JIT compiler to - optimise its code - - purging of gc before testing, to isolate timings from GC state prior - to testing - - a final forced GC after testing to estimate impact of cleanup on the - timing results - -Usage: - (use 'criterium.core) - (bench (Thread/sleep 1000) :verbose) - (with-progress-reporting (bench (Thread/sleep 1000) :verbose)) - (report-result (benchmark (Thread/sleep 1000)) :verbose) - (report-result (quick-bench (Thread/sleep 1000))) - -References: -See http://www.ellipticgroup.com/html/benchmarkingArticle.html for a Java -benchmarking library. The accompanying article describes many of the JVM -benchmarking pitfalls. - -See http://hackage.haskell.org/package/criterion for a Haskell benchmarking -library that applies many of the same statistical techniques." - (:use clojure.set - criterium.stats) - (:require criterium.well) - (:import (java.lang.management ManagementFactory))) - -(def ^{:dynamic true} *use-mxbean-for-times* nil) - -(def ^{:doc "Fraction of excution time allowed for final cleanup before a - warning is issued." - :dynamic true} - *final-gc-problem-threshold* 0.01) - -(def s-to-ns (* 1000 1000 1000)) ; in ns -(def ns-to-s 1e-9) ; in ns - -(def ^{:doc "Time period used to let the code run so that jit compiler can do - its work." - :dynamic true} - *warmup-jit-period* (* 10 s-to-ns)) ; in ns - -(def ^{:doc "Number of executions required" - :dynamic true} *sample-count* 60) - -(def ^{:doc "Target elapsed time for execution for a single measurement." - :dynamic true} - *target-execution-time* (* 1 s-to-ns)) ; in ns - -(def ^{:doc "Maximum number of attempts to run finalisers and gc." - :dynamic true} - *max-gc-attempts* 100) - -(def ^{:dynamic true} - *default-benchmark-opts* - {:max-gc-attempts *max-gc-attempts* - :samples *sample-count* - :target-execution-time *target-execution-time* - :warmup-jit-period *warmup-jit-period* - :tail-quantile 0.025 - :bootstrap-size 1000}) - -(def ^{:dynamic true} - *default-quick-bench-opts* - {:max-gc-attempts *max-gc-attempts* - :samples (/ *sample-count* 10) - :target-execution-time (/ *target-execution-time* 10) - :warmup-jit-period (/ *warmup-jit-period* 2) - :tail-quantile 0.025 - :bootstrap-size 500}) - -;;; Progress reporting -(def ^{:dynamic true} *report-progress* nil) - -(defn #^{:skip-wiki true} - progress - "Conditionally report progress to *out*." - [& message] - (when *report-progress* - (apply println message))) - -(def ^{:dynamic true} *report-debug* nil) - -(defn #^{:skip-wiki true} - debug - "Conditionally report debug to *out*." - [& message] - (when *report-debug* - (apply println message))) - -(def ^{:dynamic true} *report-warn* nil) - -(defn #^{:skip-wiki true} - warn - "Conditionally report warn to *out*." - [& message] - (when *report-warn* - (apply println "WARNING:" message))) - -;;; Java Management interface -(defprotocol StateChanged - "Interrogation of differences in a state." - (state-changed? - [state] - "Check to see if a state delta represents no change") - (state-delta - [state-1 state-2] - "Return a state object for the difference between two states")) - -(defrecord JvmClassLoaderState [loaded-count unloaded-count] - StateChanged - (state-changed? - [state] - (not (and (zero? (:loaded-count state)) (zero? (:unloaded-count state))))) - (state-delta - [state-1 state-2] - (let [vals (map - (vals state-1) (vals state-2))] - (JvmClassLoaderState. (first vals) (second vals))))) - -(defn jvm-class-loader-state [] - (let [bean (.. ManagementFactory getClassLoadingMXBean)] - (JvmClassLoaderState. (. bean getLoadedClassCount) - (. bean getUnloadedClassCount)))) - - -(defrecord JvmCompilationState [compilation-time] - StateChanged - (state-changed? - [state] - (not (zero? (:compilation-time state)))) - (state-delta - [state-1 state-2] - (let [vals (map - (vals state-1) (vals state-2))] - (JvmCompilationState. (first vals))))) - -(defn jvm-compilation-state - "Returns the total compilation time for the JVM instance." - [] - (let [bean (.. ManagementFactory getCompilationMXBean)] - (JvmCompilationState. (if (. bean isCompilationTimeMonitoringSupported) - (. bean getTotalCompilationTime) - -1)))) - -(defn jvm-jit-name - "Returns the name of the JIT compiler." - [] - (let [bean (.. ManagementFactory getCompilationMXBean)] - (. bean getName))) - -(defn os-details - "Return the operating system details as a hash." - [] - (let [bean (.. ManagementFactory getOperatingSystemMXBean)] - {:arch (. bean getArch) - :available-processors (. bean getAvailableProcessors) - :name (. bean getName) - :version (. bean getVersion)})) - -(defn runtime-details - "Return the runtime details as a hash." - [] - (let [bean (.. ManagementFactory getRuntimeMXBean) - props (. bean getSystemProperties)] - {:input-arguments (. bean getInputArguments) - :name (. bean getName) - :spec-name (. bean getSpecName) - :spec-vendor (. bean getSpecVendor) - :spec-version (. bean getSpecVersion) - :vm-name (. bean getVmName) - :vm-vendor (. bean getVmVendor) - :vm-version (. bean getVmVersion) - :java-version (get props "java.version") - :java-runtime-version (get props "java.runtime.version") - :sun-arch-data-model (get props "sun.arch.data.model") - :clojure-version-string (clojure-version) - :clojure-version *clojure-version*})) - -(defn system-properties - "Return the operating system details." - [] - (let [bean (.. ManagementFactory getRuntimeMXBean)] - (. bean getSystemProperties))) - -;;; OS Specific Code -(defn clear-cache-mac [] - (.. Runtime getRuntime (exec "/usr/bin/purge") waitFor)) - -(defn clear-cache-linux [] - ;; not sure how to deal with the sudo - (.. Runtime getRuntime - (exec "sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'") waitFor)) - -(defn clear-cache [] - (condp #(re-find %1 %2) (.. System getProperties (getProperty "os.name")) - #"Mac" (clear-cache-mac) - :else (warn "don't know how to clear disk buffer cache for " - (.. System getProperties (getProperty "os.name"))))) - -;;; Time reporting -(defmacro timestamp - "Obtain a timestamp" - [] `(System/nanoTime)) - -(defn timestamp-2 - "Obtain a timestamp, possibly using MXBean." - [] - (if *use-mxbean-for-times* - (.. ManagementFactory getThreadMXBean getCurrentThreadCpuTime) - (System/nanoTime))) - -;;; Execution timing -(defmacro time-body - "Returns a vector containing execution time and result of specified function." - ([expr pre] - `(do ~pre - (time-body ~expr))) - ([expr] - `(let [start# (timestamp) - ret# ~expr - finish# (timestamp)] - [(- finish# start#) ret#]))) - -(defn replace-ret-val-in-time-body-result - [[elapsed-time _] new-ret-val] - [elapsed-time new-ret-val]) - -(defmacro time-body-with-jvm-state - "Returns a vector containing execution time, change in loaded and unloaded -class counts, change in compilation time and result of specified function." - ([expr pre] - `(do ~pre - (time-body-with-jvm-state ~expr))) - ([expr] - `(let [cl-state# (jvm-class-loader-state) - comp-state# (jvm-compilation-state) - start# (timestamp) - ret# ~expr - finish# (timestamp)] - [(- finish# start#) - (merge-with - cl-state# (jvm-class-loader-state)) - (merge-with - comp-state# (jvm-compilation-state)) - ret#]))) - - -;;; Memory reporting -(defn heap-used - "Report a (inconsistent) snapshot of the heap memory used." - [] - (let [runtime (Runtime/getRuntime)] - (- (.totalMemory runtime) (.freeMemory runtime)))) - -(defn memory - "Report a (inconsistent) snapshot of the memory situation." - [] - (let [runtime (Runtime/getRuntime)] - [ (.freeMemory runtime) (.totalMemory runtime) (.maxMemory runtime)])) - -;;; Memory management -(defn force-gc - "Force garbage collection and finalisers so that execution time associated - with this is not incurred later. Up to max-attempts are made. -" - ([] (force-gc *max-gc-attempts*)) - ([max-attempts] - (debug "Cleaning JVM allocations ...") - (loop [memory-used (heap-used) - attempts 0] - (System/runFinalization) - (System/gc) - (let [new-memory-used (heap-used)] - (if (and (or (pos? (.. ManagementFactory - getMemoryMXBean - getObjectPendingFinalizationCount)) - (> memory-used new-memory-used)) - (< attempts max-attempts)) - (recur new-memory-used (inc attempts))))))) - -(defn final-gc - "Time a final clean up of JVM memory. If this time is significant compared to - the runtime, then the runtime should maybe include this time." - [] - (progress "Final GC...") - (first (time-body (force-gc)))) - -(defn final-gc-warn - [execution-time final-gc-time] - (progress "Checking GC...") - (let [fractional-time (/ final-gc-time execution-time) - final-gc-result [(> fractional-time *final-gc-problem-threshold*) - fractional-time - final-gc-time]] - (when (first final-gc-result) - (warn - "Final GC required" - (* 100.0 (second final-gc-result)) - "% of runtime")) - final-gc-result)) - -;;; ## Core timing loop - -;;; A mutable field is used to store the result of each function call, to -;;; prevent JIT optimising away the expression entirely. - -(defprotocol MutablePlace - "Provides a mutable place" - (set-place [_ v] "Set mutable field to value.") - (get-place [_] "Get mutable field value.")) - -(deftype Unsynchronized [^{:unsynchronized-mutable true :tag Object} v] - MutablePlace - (set-place [_ value] (set! v value)) - (get-place [_] v)) - -(def mutable-place (Unsynchronized. nil)) - -(defn execute-expr-core-timed-part - "Performs the part of execute-expr where we actually measure the elapsed run - time. Evaluates `(f)` `n` times, each time saving the return value as an - Object in `mutable-place`. - - The idea is that except for the call to (f), the only things done during each - iteration are a few arithmetic operations and comparisons to 0 on primitive - longs, and the storage of the return value. - - The JVM is not free to optimize away the calls to f because the return values - are saved in `mutable-place`." - [n f] - (time-body - (loop [i (long (dec n)) - v (f)] - (set-place mutable-place v) - (if (pos? i) - (recur (unchecked-dec i) (f)) - v)))) - -;;; ## Execution -(defn execute-expr - "Time the execution of `n` invocations of `f`. See - `execute-expr-core-timed-part`." - [n f] - (let [time-and-ret (execute-expr-core-timed-part n f)] - (get-place mutable-place) ;; just for good measure, use the mutable value - time-and-ret)) - -(defn collect-samples - [sample-count execution-count f gc-before-sample] - {:pre [(pos? sample-count)]} - (let [result (object-array sample-count)] - (loop [i (long 0)] - (if (< i sample-count) - (do - (when gc-before-sample - (force-gc)) - (aset result i (execute-expr execution-count f)) - (recur (unchecked-inc i))) - result)))) - -;;; Compilation -(defn warmup-for-jit - "Run expression for the given amount of time to enable JIT compilation." - [warmup-period f] - (progress "Warming up for JIT optimisations" warmup-period "...") - (let [cl-state (jvm-class-loader-state) - comp-state (jvm-compilation-state) - t (max 1 (first (time-body (f)))) - _ (debug " initial t" t) - [t n] (if (< t 100000) ; 100us - (let [n (/ 100000 t)] - [(first (execute-expr n f)) n]) - [t 1]) - p (/ warmup-period t) - c (long (max 1 (* n (/ p 5))))] - (debug " using t" t "n" n) - (debug " using execution-count" c) - (loop [elapsed (long t) - count (long n) - delta-free (long 0) - old-cl-state cl-state - old-comp-state comp-state] - (let [new-cl-state (jvm-class-loader-state) - new-comp-state (jvm-compilation-state)] - (if (not= old-cl-state new-cl-state) - (progress " classes loaded before" count "iterations")) - (if (not= old-comp-state new-comp-state) - (progress " compilation occurred before" count "iterations")) - (debug " elapsed" elapsed " count" count) - (if (and (> delta-free 2) (> elapsed warmup-period)) - [elapsed count - (state-delta new-cl-state cl-state) - (state-delta new-comp-state comp-state)] - (recur (+ elapsed (long (first (execute-expr c f)))) - (+ count c) - (if (and (= old-cl-state new-cl-state) - (= old-comp-state new-comp-state)) - (unchecked-inc delta-free) - (long 0)) - new-cl-state - new-comp-state)))))) - -;;; Execution parameters -(defn estimate-execution-count - "Estimate the number of executions required in order to have at least the - specified execution period, check for the jvm to have constant class loader - and compilation state." - [period f gc-before-sample estimated-fn-time] - (progress "Estimating execution count ...") - (debug " estimated-fn-time" estimated-fn-time) - (loop [n (max 1 (long (/ period (max 1 estimated-fn-time) 5))) - cl-state (jvm-class-loader-state) - comp-state (jvm-compilation-state)] - (let [t (ffirst (collect-samples 1 n f gc-before-sample)) - ;; It is possible for small n and a fast expression to get - ;; t=0 nsec back from collect-samples. This is likely due - ;; to how (System/nanoTime) quantizes the time on some - ;; systems. - t (max 1 t) - new-cl-state (jvm-class-loader-state) - new-comp-state (jvm-compilation-state)] - (debug " ..." n) - (when (not= comp-state new-comp-state) - (warn "new compilations in execution estimation phase")) - (if (and (>= t period) - (= cl-state new-cl-state) - (= comp-state new-comp-state)) - n - (recur (if (>= t period) - n - (min (* 2 n) (inc (long (* n (/ period t)))))) - new-cl-state new-comp-state))))) - - -;; benchmark -(defn run-benchmark - "Benchmark an expression. This tries its best to eliminate sources of error. - This also means that it runs for a while. It will typically take 70s for a - quick test expression (less than 1s run time) or 10s plus 60 run times for - longer running expressions." - [sample-count warmup-jit-period target-execution-time f gc-before-sample - overhead] - (force-gc) - (let [first-execution (time-body (f)) - [warmup-t warmup-n cl-state comp-state] (warmup-for-jit - warmup-jit-period f) - n-exec (estimate-execution-count - target-execution-time f gc-before-sample - (long (/ warmup-t warmup-n))) - total-overhead (long (* (or overhead 0) 1e9 n-exec)) - _ (progress "Sampling ...") - _ (debug - "Running with\n sample-count" sample-count \newline - "exec-count" n-exec \newline - "overhead[s]" overhead \newline - "total-overhead[ns]" total-overhead) - _ (force-gc) - samples (collect-samples sample-count n-exec f gc-before-sample) - final-gc-time (final-gc) - sample-times (->> samples - (map first) - (map #(- % total-overhead))) - total (reduce + 0 sample-times) - final-gc-result (final-gc-warn total final-gc-time)] - {:execution-count n-exec - :sample-count sample-count - :samples sample-times - :results (map second samples) - :total-time (/ total 1e9) - :warmup-time warmup-t - :warmup-executions warmup-n - :final-gc-time final-gc-time - :overhead overhead})) - - -(defn run-benchmarks-round-robin - "Benchmark multiple expressions in a 'round robin' fashion. Very -similar to run-benchmark, except it takes multiple expressions in a -sequence instead of only one (each element of the sequence should be a -map with keys :f and :expr-string). It runs the following steps in -sequence: - -1. Execute each expr once - -2. Run expression 1 for at least warmup-jit-period nanoseconds so the - JIT has an opportunity to optimize it. Then do the same for each - of the other expressions. - -3. Run expression 1 many times to estimate how many times it must be - executed to take a total of target-execution-time nanoseconds. The - result is a number of iterations n-exec1 for expression 1. Do the - same for each of the other expressions, each with the same - target-execution-time, each resulting in its own independent number - of executions. - -4. Run expression 1 n-exec1 times, measuring the total elapsed time. - Do the same for the rest of the expressions. - -5. Repeat step 4 a total of sample-count times." - [sample-count warmup-jit-period target-execution-time exprs gc-before-sample] - (force-gc) - (let [first-executions (map (fn [{:keys [f]}] (time-body (f))) exprs) - _ (progress (format "Warming up %d expression for %.2e sec each:" - (count exprs) (/ warmup-jit-period 1.0e9))) - warmup (vec (for [{:keys [f expr-string]} exprs] - (do (progress (format " %s..." expr-string)) - (warmup-for-jit warmup-jit-period f))))] - (progress - (format - "Estimating execution counts for %d expressions. Target execution time = %.2e sec:" - (count exprs) (/ target-execution-time 1.0e9))) - (let [exprs (map-indexed - (fn [idx {:keys [f expr-string] :as expr}] - (progress (format " %s..." expr-string)) - (let [ [warmup-t warmup-n cl-state comp-state] (get warmup idx)] - (assoc expr :index idx - :n-exec (estimate-execution-count - target-execution-time f - gc-before-sample - (long (/ warmup-t warmup-n)))))) - exprs) -;; _ (progress -;; "Running with sample-count" sample-count -;; "exec-count" n-exec ; tbd: update) - all-samples (doall - (for [i (range sample-count)] - (do - (progress - (format - " Running sample %d/%d for %d expressions:" - (inc i) sample-count (count exprs))) - (doall - (for [{:keys [f n-exec expr-string] :as expr} exprs] - (do - (progress (format " %s..." expr-string)) - (assoc expr - :sample (first - (collect-samples - 1 n-exec f gc-before-sample))))))))) - - ;; 'transpose' all-samples so that all samples for a - ;; particular expression are in a sequence together, and - ;; all-samples is a sequence of one map per expression. - all-samples (group-by :index (apply concat all-samples)) - all-samples - (map (fn [[idx data-seq]] - (let [expr (dissoc (first data-seq) :sample) - n-exec (:n-exec expr) - samples (map :sample data-seq) - final-gc-time (final-gc) - sample-times (map first samples) - total (reduce + 0 sample-times) - ;; TBD: Doesn't make much sense to attach final - ;; GC warning to the expression that happened - ;; to be first in the sequence, but that is - ;; what this probably does right now. Think - ;; what might be better to do. - final-gc-result (final-gc-warn total final-gc-time)] - {:execution-count n-exec - :sample-count sample-count - :samples sample-times - :results (map second samples) - :total-time (/ total 1e9)})) - all-samples)] - all-samples))) - - -(defn bootstrap-bca - "Bootstrap a statistic. Statistic can produce multiple statistics as a vector - so you can use juxt to pass multiple statistics. - http://en.wikipedia.org/wiki/Bootstrapping_(statistics)" - [data statistic size alpha rng-factory] - (progress "Bootstrapping ...") - (let [bca (bca-nonparametric data statistic size alpha rng-factory)] - (if (vector? bca) - (bca-to-estimate alpha bca) - (map (partial bca-to-estimate alpha) bca)))) - -(defn bootstrap - "Bootstrap a statistic. Statistic can produce multiple statistics as a vector - so you can use juxt to pass multiple statistics. - http://en.wikipedia.org/wiki/Bootstrapping_(statistics)" - [data statistic size rng-factory] - (progress "Bootstrapping ...") - (let [samples (bootstrap-sample data statistic size rng-factory) - transpose (fn [data] (apply map vector data))] - (if (vector? (first samples)) - (map bootstrap-estimate samples) - (bootstrap-estimate samples)))) - -;;; Outliers - -(defn outlier-effect - "Return a keyword describing the effect of outliers on the estimate of mean - runtime." - [var-out-min] - (cond - (< var-out-min 0.01) :unaffected - (< var-out-min 0.1) :slight - (< var-out-min 0.5) :moderate - :else :severe)) - -(defn point-estimate [estimate] - (first estimate)) - -(defn point-estimate-ci [estimate] - (last estimate)) - -(defn outlier-significance - "Find the significance of outliers given boostrapped mean and variance -estimates. -See http://www.ellipticgroup.com/misc/article_supplement.pdf, p17." - [mean-estimate variance-estimate n] - (progress "Checking outlier significance") - (let [mean-block (point-estimate mean-estimate) - variance-block (point-estimate variance-estimate) - std-dev-block (Math/sqrt variance-block) - mean-action (/ mean-block n) - mean-g-min (/ mean-action 2) - sigma-g (min (/ mean-g-min 4) (/ std-dev-block (Math/sqrt n))) - variance-g (* sigma-g sigma-g) - c-max (fn [t-min] - (let [j0 (- mean-action t-min) - k0 (- (* n n j0 j0)) - k1 (+ variance-block (- (* n variance-g)) (* n j0 j0)) - det (- (* k1 k1) (* 4 variance-g k0))] - (Math/floor (/ (* -2 k0) (+ k1 (Math/sqrt det)))))) - var-out (fn [c] - (let [nmc (- n c)] - (* (/ nmc n) (- variance-block (* nmc variance-g))))) - min-f (fn [f q r] - (min (f q) (f r))) - ] - (/ (min-f var-out 1 (min-f c-max 0 mean-g-min)) variance-block))) - - -(defrecord OutlierCount [low-severe low-mild high-mild high-severe]) - -(defn outlier-count - [low-severe low-mild high-mild high-severe] - (OutlierCount. low-severe low-mild high-mild high-severe)) - - -(defn add-outlier [low-severe low-mild high-mild high-severe counts x] - (outlier-count - (if (<= x low-severe) - (inc (:low-severe counts)) - (:low-severe counts)) - (if (< low-severe x low-mild) - (inc (:low-mild counts)) - (:low-mild counts)) - (if (> high-severe x high-mild) - (inc (:high-mild counts)) - (:high-mild counts)) - (if (>= x high-severe) - (inc (:high-severe counts)) - (:high-severe counts)))) - -(defn outliers - "Find the outliers in the data using a boxplot technique." - [data] - (progress "Finding outliers ...") - (reduce (apply partial add-outlier - (apply boxplot-outlier-thresholds - ((juxt first last) (quartiles (sort data))))) - (outlier-count 0 0 0 0) - data)) - -;;; overhead estimation -(declare benchmark*) - -(defn estimate-overhead - "Calculate a conservative estimate of the timing loop overhead." - [] - (-> (benchmark* - (fn [] 0) - {:warmup-jit-period (* 10 s-to-ns) - :samples 10 - :target-execution-time (* 0.5 s-to-ns) - :overhead 0 - :supress-jvm-option-warnings true}) - :lower-q - first)) - -(def estimated-overhead-cache nil) - -(defn estimated-overhead! - "Sets the estimated overhead." - [] - (progress "Estimating sampling overhead") - (alter-var-root - #'estimated-overhead-cache (constantly (estimate-overhead)))) - -(defn estimated-overhead - [] - (or estimated-overhead-cache - (estimated-overhead!))) - -;;; options -(defn extract-report-options - "Extract reporting options from the given options vector. Returns a two - element vector containing the reporting options followed by the non-reporting - options" - [opts] - (let [known-options #{:os :runtime :verbose} - option-set (set opts)] - [(intersection known-options option-set) - (remove #(contains? known-options %1) opts)])) - -(defn add-default-options [options defaults] - (let [time-periods #{:warmup-jit-period :target-execution-time}] - (merge defaults - (into {} (map #(if (contains? time-periods (first %1)) - [(first %1) (* (second %1) s-to-ns)] - %1) - options))))) - -;;; User top level functions -(defmacro with-progress-reporting - "Macro to enable progress reporting during the benchmark." - [expr] - `(binding [*report-progress* true] - ~expr)) - -(defn benchmark-stats [times opts] - (let [outliers (outliers (:samples times)) - tail-quantile (:tail-quantile opts) - stats (bootstrap-bca - (map double (:samples times)) - (juxt - mean - variance - (partial quantile tail-quantile) - (partial quantile (- 1.0 tail-quantile))) - (:bootstrap-size opts) [0.5 tail-quantile (- 1.0 tail-quantile)] - criterium.well/well-rng-1024a) - analysis (outlier-significance (first stats) (second stats) - (:sample-count times)) - sqr (fn [x] (* x x)) - m (mean (map double (:samples times))) - s (Math/sqrt (variance (map double (:samples times))))] - (merge times - {:outliers outliers - :mean (scale-bootstrap-estimate - (first stats) (/ 1e-9 (:execution-count times))) - :sample-mean (scale-bootstrap-estimate - [m [(- m (* 3 s)) (+ m (* 3 s))]] - (/ 1e-9 (:execution-count times))) - :variance (scale-bootstrap-estimate - (second stats) (sqr (/ 1e-9 (:execution-count times)))) - :sample-variance (scale-bootstrap-estimate - [ (sqr s) [0 0]] - (sqr (/ 1e-9 (:execution-count times)))) - :lower-q (scale-bootstrap-estimate - (nth stats 2) (/ 1e-9 (:execution-count times))) - :upper-q (scale-bootstrap-estimate - (nth stats 3) (/ 1e-9 (:execution-count times))) - :outlier-variance analysis - :tail-quantile (:tail-quantile opts) - :os-details (os-details) - :options opts - :runtime-details (-> - (runtime-details) - (update-in [:input-arguments] vec))}))) - -(defn warn-on-suspicious-jvm-options - "Warn if the JIT options are suspicious looking." - [] - (let [compiler (jvm-jit-name) - {:keys [input-arguments]} (runtime-details)] - (when-let [arg (and (re-find #"Tiered" compiler) - (some #(re-find #"TieredStopAtLevel=(.*)" %) - input-arguments))] - (warn - "JVM argument" (first arg) "is active," - "and may lead to unexpected results as JIT C2 compiler may not be active." - "See http://www.slideshare.net/CharlesNutter/javaone-2012-jvm-jit-for-dummies.")))) - -(defn benchmark* - "Benchmark a function. This tries its best to eliminate sources of error. - This also means that it runs for a while. It will typically take 70s for a - fast test expression (less than 1s run time) or 10s plus 60 run times for - longer running expressions." - [f {:keys [samples warmup-jit-period target-execution-time gc-before-sample - overhead supress-jvm-option-warnings] :as options}] - (when-not supress-jvm-option-warnings - (warn-on-suspicious-jvm-options)) - (let [{:keys [samples warmup-jit-period target-execution-time - gc-before-sample overhead] :as opts} - (merge *default-benchmark-opts* - {:overhead (or overhead (estimated-overhead))} - options) - times (run-benchmark samples warmup-jit-period target-execution-time f - gc-before-sample overhead)] - (benchmark-stats times opts))) - -(defn benchmark-round-robin* - [exprs options] - (let [opts (merge *default-benchmark-opts* options) - times (run-benchmarks-round-robin - (:samples opts) - (:warmup-jit-period opts) - (:target-execution-time opts) - exprs - (:gc-before-sample opts))] - (map #(benchmark-stats % opts) times))) - -(defmacro benchmark - "Benchmark an expression. This tries its best to eliminate sources of error. - This also means that it runs for a while. It will typically take 70s for a - fast test expression (less than 1s run time) or 10s plus 60 run times for - longer running expressions." - [expr options] - `(benchmark* (fn [] ~expr) ~options)) - -(defmacro benchmark-round-robin - [exprs options] - (let [wrap-exprs (fn [exprs] - (cons 'list - (map (fn [expr] - {:f `(fn [] ~expr) - :expr-string (str expr)}) - exprs)))] - `(benchmark-round-robin* ~(wrap-exprs exprs) ~options))) - -(defn quick-benchmark* - "Benchmark an expression. Less rigorous benchmark (higher uncertainty)." - [f {:as options}] - (benchmark* f (merge *default-quick-bench-opts* options))) - -(defmacro quick-benchmark - "Benchmark an expression. Less rigorous benchmark (higher uncertainty)." - [expr options] - `(quick-benchmark* (fn [] ~expr) ~options)) - -(defn report - "Print format output" - [format-string & values] - (print (apply format format-string values))) - -(defn scale-time - "Determine a scale factor and unit for displaying a time." - [measurement] - (cond - (> measurement 60) [(/ 60) "min"] - (< measurement 1e-6) [1e9 "ns"] - (< measurement 1e-3) [1e6 "µs"] - (< measurement 1) [1e3 "ms"] - :else [1 "sec"])) - -(defn format-value [value scale unit] - (format "%f %s" (* scale value) unit)) - -(defn report-estimate - [msg estimate significance] - (let [mean (first estimate) - [factor unit] (scale-time mean)] - (apply - report "%32s : %s %2.1f%% CI: (%s, %s)\n" - msg - (format-value mean factor unit) - (* significance 100) - (map #(format-value % factor unit) (last estimate))))) - -(defn report-point-estimate - ([msg estimate] - (let [mean (first estimate) - [factor unit] (scale-time mean)] - (report "%32s : %s\n" msg (format-value mean factor unit)))) - ([msg estimate quantile] - (let [mean (first estimate) - [factor unit] (scale-time mean)] - (report - "%32s : %s (%4.1f%%)\n" - msg (format-value mean factor unit) (* quantile 100))))) - -(defn report-estimate-sqrt - [msg estimate significance] - (let [mean (Math/sqrt (first estimate)) - [factor unit] (scale-time mean)] - (apply - report "%32s : %s %2.1f%% CI: (%s, %s)\n" - msg - (format-value mean factor unit) - (* significance 100) - (map #(format-value (Math/sqrt %) factor unit) (last estimate))))) - -(defn report-point-estimate-sqrt - [msg estimate] - (let [mean (Math/sqrt (first estimate)) - [factor unit] (scale-time mean)] - (report "%32s : %s\n" msg (format-value mean factor unit)))) - -(defn report-outliers [results] - (let [outliers (:outliers results) - values (vals outliers) - labels {:unaffected "unaffected" - :slight "slightly inflated" - :moderate "moderately inflated" - :severe "severely inflated"} - sample-count (:sample-count results) - types ["low-severe" "low-mild" "high-mild" "high-severe"]] - (when (some pos? values) - (let [sum (reduce + values)] - (report - "\nFound %d outliers in %d samples (%2.4f %%)\n" - sum sample-count (* 100.0 (/ sum sample-count)))) - (doseq [[v c] (partition 2 (interleave (filter pos? values) types))] - (report "\t%s\t %d (%2.4f %%)\n" c v (* 100.0 (/ v sample-count)))) - (report " Variance from outliers : %2.4f %%" - (* (:outlier-variance results) 100.0)) - (report " Variance is %s by outliers\n" - (-> (:outlier-variance results) outlier-effect labels))))) - -(defn report-result [results & opts] - (let [verbose (some #(= :verbose %) opts) - show-os (or verbose (some #(= :os %) opts)) - show-runtime (or verbose (some #(= :runtime %) opts))] - (when show-os - (apply println - (-> (map - #(%1 (:os-details results)) - [:arch :name :version :available-processors]) - vec (conj "cpu(s)")))) - (when show-runtime - (let [runtime-details (:runtime-details results)] - (apply println (map #(%1 runtime-details) [:vm-name :vm-version])) - (apply println "Runtime arguments:" - (:input-arguments runtime-details)))) - (println "Evaluation count :" (* (:execution-count results) - (:sample-count results)) - "in" (:sample-count results) "samples of" - (:execution-count results) "calls.") - - (when verbose - (report-point-estimate - "Execution time sample mean" (:sample-mean results))) - (report-point-estimate "Execution time mean" (:mean results)) - (when verbose - (report-point-estimate-sqrt - "Execution time sample std-deviation" (:sample-variance results))) - (report-point-estimate-sqrt - "Execution time std-deviation" (:variance results)) - (report-point-estimate - "Execution time lower quantile" - (:lower-q results) (:tail-quantile results)) - (report-point-estimate - "Execution time upper quantile" - (:upper-q results) (- 1.0 (:tail-quantile results))) - (when-let [overhead (:overhead results)] - (when (pos? overhead) - (report-point-estimate "Overhead used" [overhead]))) - (report-outliers results))) - -(defmacro bench - "Convenience macro for benchmarking an expression, expr. Results are reported - to *out* in human readable format. Options for report format are: :os, -:runtime, and :verbose." - [expr & opts] - (let [[report-options options] (extract-report-options opts)] - `(report-result - (benchmark - ~expr - ~(when (seq options) (apply hash-map options))) - ~@report-options))) - -(defmacro quick-bench - "Convenience macro for benchmarking an expression, expr. Results are reported -to *out* in human readable format. Options for report format are: :os, -:runtime, and :verbose." - [expr & opts] - (let [[report-options options] (extract-report-options opts)] - `(report-result - (quick-benchmark - ~expr - ~(when (seq options) (apply hash-map options))) - ~@report-options))) diff --git a/src/criterium/stats.clj b/src/criterium/stats.clj deleted file mode 100644 index 3931560..0000000 --- a/src/criterium/stats.clj +++ /dev/null @@ -1,348 +0,0 @@ -;;;; Copyright (c) Hugo Duncan. All rights reserved. - -;;;; The use and distribution terms for this software are covered by the -;;;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) -;;;; which can be found in the file epl-v10.html at the root of this distribution. -;;;; By using this software in any fashion, you are agreeing to be bound by -;;;; the terms of this license. -;;;; You must not remove this notice, or any other, from this software. - -;;;; A collection of statistical methods used by criterium - -(ns criterium.stats) - -;; (set! *warn-on-reflection* true) - -;;; Utilities -(defn transpose - "Transpose a vector of vectors." - [data] - (if (vector? (first data)) - (apply map vector data) - data)) - -(defn sqr - "Square of argument" - [x] (* x x)) - -(defn cube - "Square of argument" - [x] (* x x x)) - - -;;; Statistics -(defn mean - "Arithmetic mean of data." - [data] - (/ (reduce + data) (count data))) - -(defn sum - "Sum of each data point." - [data] (reduce + data)) - -(defn sum-of-squares - "Sum of the squares of each data point." - [data] - (reduce - (fn [s v] - (+ s (* v v))) 0.0 data)) - -(defn variance - "Sample variance. Returns variance. - Ref: Chan et al. Algorithms for computing the sample variance: analysis and - recommendations. American Statistician (1983)." - ([data] (variance data 1)) - ([data df] - ;; Uses a single pass, non-pairwise algorithm, without shifting. - (letfn [(update-estimates [[m q k] x] - [(+ m (/ (- x m) (inc k))) - (+ q (/ (* k (sqr (- x m))) (inc k))) - (inc k)])] - (let [[m q k] (reduce update-estimates [0.0 0.0 0.0] data)] - (/ q (- k df)))))) - -;; For the moment we take the easy option of sorting samples -(defn median - "Calculate the median of a sorted data set - References: http://en.wikipedia.org/wiki/Median" - [data] - (let [n (count data) - i (bit-shift-right n 1)] - (if (even? n) - [(/ (+ (nth data (dec i)) (nth data i)) 2) - (take i data) - (drop i data)] - [(nth data (bit-shift-right n 1)) - (take i data) - (drop (inc i) data)]))) - -(defn quartiles - "Calculate the quartiles of a sorted data set - References: http://en.wikipedia.org/wiki/Quartile" - [data] - (let [[m lower upper] (median data)] - [(first (median lower)) m (first (median upper))])) - -(defn quantile - "Calculate the quantile of a sorted data set - References: http://en.wikipedia.org/wiki/Quantile" - [quantile data] - (let [n (dec (count data)) - interp (fn [x] - (let [f (Math/floor x) - i (long f) - p (- x f)] - (+ (* p (nth data (inc i))) (* (- 1.0 p) (nth data i)))))] - (interp (* quantile n)))) - -(defn boxplot-outlier-thresholds - "Outlier thresholds for given quartiles." - [q1 q3] - (let [iqr (- q3 q1) - severe (* iqr 3) - mild (* iqr 1.5)] - [(- q1 severe) - (- q1 mild) - (+ q3 mild) - (+ q3 severe)])) - - -(defn uniform-distribution - "Return uniformly distributed deviates on 0..max-val use the specified rng." - [max-val rng] - (map (fn [x] (* x max-val)) rng)) - -(defn sample-uniform - "Provide n samples from a uniform distribution on 0..max-val" - [n max-val rng] - (take n (uniform-distribution max-val rng))) - -(defn sample - "Sample with replacement." - [x rng] - (let [n (count x)] - (map #(nth x %1) (sample-uniform n n rng)))) - -(defn bootstrap-sample - "Bootstrap sampling of a statistic, using resampling with replacement." - [data statistic size rng-factory] - (transpose - (for [_ (range size)] (statistic (sort (sample data (rng-factory))))))) - -(defn confidence-interval - "Find the significance of outliers gicen boostrapped mean and variance - estimates. This uses the bootstrapped statistic's variance, but we should use - BCa of ABC." - [mean variance] - (let [n-sigma 1.96 ; use 95% confidence interval - delta (* n-sigma (Math/sqrt variance))] - [(- mean delta) (+ mean delta)])) - -(defn bootstrap-estimate - "Mean, variance and confidence interval. This uses the bootstrapped - statistic's variance for the confidence interval, but we should use BCa of - ABC." - [sampled-stat] - (let [stats ((juxt mean variance ) sampled-stat)] - (conj stats (apply confidence-interval stats)))) - -(defn scale-bootstrap-estimate [estimate scale] - [(* (first estimate) scale) - (map #(* scale %1) (last estimate))]) - -(defn polynomial-value - "Evaluate a polynomial at the given value x, for the coefficients given in -descending order (so the last element of coefficients is the constant term)." - [x coefficients] - (reduce #(+ (* x %1) %2) (first coefficients) (rest coefficients))) - -(defn erf - "erf polynomial approximation. Maximum error is 1.5e-7. - Handbook of Mathematical Functions: with Formulas, Graphs, and Mathematical - Tables. Milton Abramowitz (Editor), Irene A. Stegun (Editor), 7.1.26" - [x] - (let [x (double x) - sign (Math/signum x) - x (Math/abs x) - a [1.061405429 -1.453152027 1.421413741 -0.284496736 0.254829592 0.0] - p 0.3275911 - t (/ (+ 1.0 (* p x))) - value (- 1.0 (* (polynomial-value t a) (Math/exp (- (* x x)))))] - (* sign value))) - -(defn normal-cdf - "Probability p(X estimate) samples)) size)) - jack-mean (mean jack-samples) - jack-deviation (map #(- jack-mean %1) jack-samples) - acc (/ (reduce + 0.0 (map cube jack-deviation)) - (* 6.0 (Math/pow (reduce + 0.0 (map sqr jack-deviation)) 1.5))) - tt (map - #(normal-cdf (+ z0 (/ (+ z0 %1) (- 1.0 (* acc (+ z0 %1)))))) - z-alpha) - ooo (map #(trunc (* %1 size)) tt) - sorted-samples (sort samples) - confpoints (map (partial nth sorted-samples) ooo)] - [confpoints z0 acc jack-mean jack-samples])) - -(defn bca-nonparametric - "Non-parametric BCa estimate of a statistic on data. Size bootstrap samples - are used. Confidence values are returned at the alpha normal - quantiles. rng-factory is a method that returns a random number generator to - use for the sampling. - - An introduction to the bootstrap. Efron, B., & Tibshirani, R. J. (1993). - - See http://lib.stat.cmu.edu/S/bootstrap.funs for Efron's original - implementation." - [data statistic size alpha rng-factory] - (let [n (count data) - data (sort data) - estimate (statistic data) - samples (bootstrap-sample data statistic size rng-factory) - jack-samples (jacknife data statistic) - alpha (if (vector? alpha) alpha [alpha]) - z-alpha (map normal-quantile alpha)] - (if (vector? estimate) - (map - (partial bca-nonparametric-eval n size data z-alpha) - estimate samples jack-samples) - (bca-nonparametric-eval - n size data z-alpha estimate samples jack-samples)))) - -(defn bca-to-estimate [alpha bca-estimate] - [(first (first bca-estimate)) (next (first bca-estimate))]) - - - -;;; Nonparametric assessment of multimodality for univariate data. -;;; Salgado-Ugarte IH, Shimizu M. 1998 - -;;; Maximum likelihood kernel density estimation: On the potential of convolution sieves. -;;; Jones and Henderson. Computational Statistics and Data Analysis (2009) - -(defn modal-estimation-constant - "Kernel function for estimation of multi-modality. - h-k is the critical bandwidth, sample-variance is the observed sample variance. - Equation 7, Nonparametric assessment of multimodality for univariate - data. Salgado-Ugarte IH, Shimizu M" - [h-k sample-variance] - (Math/sqrt (+ 1 (/ (sqr h-k) sample-variance)))) - -(defn smoothed-sample - "Smoothed estimation function." - [c-k h-k data deviates] - (lazy-seq - (cons - (* c-k (+ (take 1 data) (* h-k (take 1 deviates)))) - (if-let [n (next data)] - (smoothed-sample c-k h-k n (next deviates)))))) - -(defn gaussian-weight - "Weight function for gaussian kernel." - [t] - (let [k (Math/pow (* 2 Math/PI) -0.5)] - (* k (Math/exp (/ (* t t) -2))))) - -(defn kernel-density-estimator - "Kernel density estimator for x, given n samples X, weights K and width h." - [h K n X x] - (/ (reduce #(+ %1 (K (/ (- x %2) h))) 0 X) (* n h))) diff --git a/src/criterium/well.clj b/src/criterium/well.clj deleted file mode 100644 index d96ff36..0000000 --- a/src/criterium/well.clj +++ /dev/null @@ -1,78 +0,0 @@ -;;;; Copyright (c) Hugo Duncan. All rights reserved. - -;;;; The use and distribution terms for this software are covered by the -;;;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) -;;;; which can be found in the file epl-v10.html at the root of this distribution. -;;;; By using this software in any fashion, you are agreeing to be bound by -;;;; the terms of this license. -;;;; You must not remove this notice, or any other, from this software. - -;;;; Improved Long-Period Generators Based on Linear Recurrences Modulo 2 -;;;; F. Panneton, P. L'Ecuyer and M. Matsumoto -;;;; http://www.iro.umontreal.ca/~panneton/WELLRNG.html - -(ns criterium.well) - -;;; Macros to help convert unsigned algorithm to our implementation with signed -;;; integers. -;;; unsign is used to convert the [0.5,-0.5] range back onto [1,0] -(defmacro bit-shift-right-ns - "A bit shift that doesn't do sign extension." - [a b] - `(let [n# ~b] - (if (neg? n#) - (bit-shift-left ~a (- n#)) - (bit-and - (bit-shift-right Integer/MAX_VALUE (dec n#)) - (bit-shift-right ~a n#))))) - -(defmacro unsign - "Convert a result based on a signed integer, and convert it to what it would - have been for an unsigned integer." - [x] - `(let [v# ~x] - (if (neg? v#) (+ 1 v#) v#))) - -(def int-max (bit-or (bit-shift-left Integer/MAX_VALUE 1) 1)) - -(defmacro limit-bits [x] - `(bit-and int-max ~x)) - -(defmacro mat0-pos [t v] - `(let [v# ~v] (bit-xor v# (bit-shift-right v# ~t)))) - -(defmacro mat0-neg [t v] - `(let [v# ~v] - (long (bit-xor v# (limit-bits (bit-shift-left v# (- ~t))))))) - -(defmacro add-mod-32 [a b] - `(long (bit-and (+ ~a ~b) 0x01f))) - -(defn well-rng-1024a - "Well RNG 1024a -See: Improved Long-Period Generators Based on Linear Recurrences Modulo 2 -F. Panneton, P. L'Ecuyer and M. Matsumoto -http://www.iro.umontreal.ca/~panneton/WELLRNG.html" - ([] (well-rng-1024a - (long-array 32 (repeatedly 32 #(rand-int Integer/MAX_VALUE))) - (rand-int 32))) - ([^longs state ^long index] - {:pre [(<= 0 index 32)]} - (let [m1 3 - m2 24 - m3 10 - fact 2.32830643653869628906e-10 - new-index (add-mod-32 index 31) - z0 (aget state new-index) - z1 (bit-xor (aget state index) - (mat0-pos 8 (aget state (add-mod-32 index m1)))) - z2 (bit-xor (mat0-neg -19 (aget state (add-mod-32 index m2))) - (mat0-neg -14 (aget state (add-mod-32 index m3))))] - (aset state index (bit-xor z1 z2)) - (aset state new-index - (bit-xor (bit-xor (mat0-neg -11 z0) (mat0-neg -7 z1)) - (mat0-neg -13 z2))) - (let [] - (lazy-seq - (cons (unsign (* (aget state new-index) fact)) - (well-rng-1024a state new-index))))))) diff --git a/src/criterium/ziggurat.clj b/src/criterium/ziggurat.clj deleted file mode 100644 index 0a15a78..0000000 --- a/src/criterium/ziggurat.clj +++ /dev/null @@ -1,102 +0,0 @@ -;;;; Copyright (c) Hugo Duncan. All rights reserved. - -;;;; The use and distribution terms for this software are covered by the -;;;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) -;;;; which can be found in the file epl-v10.html at the root of this distribution. -;;;; By using this software in any fashion, you are agreeing to be bound by -;;;; the terms of this license. -;;;; You must not remove this notice, or any other, from this software. - -;;; Implementation of ZIGNOR -;;; An improved Ziggurat method to generate normal random samples, Doornik, 2005 - -(ns criterium.ziggurat - (:require criterium.well)) - -(def ^:dynamic *zignor-c* 128 ) ; "Number of blocks." - ; "Start of the right tail" (R * phi(R) + Pr(X>=R)) * sqrt(2\pi) -(def ^:dynamic *zignor-r* 3.442619855899e0) -(def ^:dynamic *zignor-v* 9.91256303526217e-3) - -(defn- sqr [x] (* x x)) - -(defn zignor-init - "Initialise tables." - [c r v] - (let [c (int c) - r (double r) - v (double v) - #^doubles s-adzigx (double-array (inc c)) - #^doubles s-adzigr (double-array c) - f (Math/exp (* -0.5e0 r r))] - (aset s-adzigx 0 (/ v f)) ;; [0] is bottom block: V / f(R) - (aset s-adzigx 1 r) - (aset s-adzigx c (double 0.0)) - (loop [i (int 2) - f f] - (aset s-adzigx i - (Math/sqrt (* -2e0 (Math/log (+ (/ v (aget s-adzigx (dec i))) f))))) - (when (< i c) - (recur - (inc i) - (Math/exp (* -0.5e0 (aget s-adzigx i) (aget s-adzigx i)))))) - - (for [#^Integer i (range c)] - (let [j (int i)] - (aset s-adzigr j (/ (aget s-adzigx (inc j)) (aget s-adzigx j))))) - [s-adzigr s-adzigx r (dec c)])) - - -(defn random-normal-zig - "Pseudo-random normal variates. -An implementation of ZIGNOR -See: - An improved Ziggurat method to generate normal random samples, Doornik, 2005" - ([] - (random-normal-zig (criterium.well/well-rng-1024a) - (zignor-init *zignor-c* *zignor-r* *zignor-v*))) - ([rng-seq] - (random-normal-zig rng-seq (zignor-init *zignor-c* *zignor-r* *zignor-v*))) - ([rng-seq c r v] (random-normal-zig rng-seq (zignor-init c r v))) - ([c r v] - (random-normal-zig (criterium.well/well-rng-1024a) (zignor-init c r v))) - ([rng-seq [#^doubles s-adzigr #^doubles s-adzigx zignor-r mask]] - (letfn [(random-normal-tail - [min negative rng-seq] - (loop [rng-seq rng-seq] - (let [x (/ (Math/log (first rng-seq)) min) - y (Math/log (first (next rng-seq)))] - (if (>= (* -2e0 y) (* x x)) - (if negative - [(- x min) (drop 2 rng-seq)] - [(- min x) (drop 2 rng-seq)]) - (recur (drop 2 rng-seq))))))] - (let [[deviate rng-seq] - (loop [rng-seq rng-seq] - (let [r (first rng-seq) - u (double (- (* 2e0 r) 1e0)) - i (bit-and - (int (* Integer/MAX_VALUE (first (drop 1 rng-seq)))) - mask)] - ;; first try the rectangular boxes - (if (< (Math/abs u) (nth s-adzigr i)) - [(* u (nth s-adzigx i)) (drop 2 rng-seq)] - - ;; bottom box: sample from the tail - (if (zero? i) - (random-normal-tail zignor-r (neg? u) (drop 2 rng-seq)) - - ;; is this a sample from the wedges? - (let [x (* u (nth s-adzigx i)) - f0 (Math/exp - (* -0.5e0 - (- (Math/pow (nth s-adzigx i) 2) (sqr x)))) - f1 (Math/exp - (* -0.5e0 - (- (Math/pow (nth s-adzigx (inc i)) 2) - (sqr x))))] - (if (< (+ f1 (* (first (drop 2 rng-seq) ) (- f0 f1))) - 1.0) - [x (drop 3 rng-seq)] - (recur (drop 3 rng-seq) )))))))] - (lazy-seq (cons deviate (random-normal-zig rng-seq))))))) diff --git a/test/criterium/core_test.clj b/test/criterium/core_test.clj deleted file mode 100644 index 24574e1..0000000 --- a/test/criterium/core_test.clj +++ /dev/null @@ -1,61 +0,0 @@ -(ns criterium.core-test - (:use [criterium.core] :reload-all) - (:use [clojure.test :only [deftest is]]) - (:require criterium.stats - criterium.well)) - - -(deftest outliers-test - (is (= (outlier-count 0 0 0 0) - (outliers [1 2 5 7 8]))) - (is (= (outlier-count 0 0 0 0) - (outliers [1 2 2 5 7 8]))) - (is (= (outlier-count 1 0 0 0) - (outliers [-100 1 2 5 7 8 9]))) - (is (= (outlier-count 0 1 0 0) - (outliers [-10 1 2 5 7 8 9]))) - (is (= (outlier-count 0 0 1 0) - (outliers [1 1 2 5 7 8 22]))) - (is (= (outlier-count 0 0 0 1) - (outliers [1 1 2 5 7 8 100])))) - -(deftest outlier-effect-test - (is (= :unaffected (outlier-effect 0.009))) - (is (= :slight (outlier-effect 0.09))) - (is (= :moderate (outlier-effect 0.49))) - (is (= :severe (outlier-effect 0.51)))) - -(deftest outlier-significance-test - ;; http://www.ellipticgroup.com/misc/article_supplement.pdf, p22 - (is (= 0.9960022873987793 - (outlier-significance - [1.395522860870968 []] - [(* 0.0013859776344426547 0.0013859776344426547) []] - 67108864)))) - -(deftest bootstrap-test - (is (= [1 0.0 [1.0 1.0]] - (bootstrap (take 20 (repeatedly (constantly 1))) - criterium.stats/mean - 100 - criterium.well/well-rng-1024a))) - (is (= [ [1 0.0 [1.0 1.0]] [0.0 0.0 [0.0 0.0]]] - (bootstrap (take 20 (repeatedly (constantly 1))) - (juxt criterium.stats/mean criterium.stats/variance) - 100 - criterium.well/well-rng-1024a)))) - -(deftest bootstrap-bca-test - (let [ci 0.95] - (is (= [1 [1 1]] - (bootstrap-bca (take 20 (repeatedly (constantly 1))) - criterium.stats/mean - 100 - [0.5 ci (- 1.0 ci)] - criterium.well/well-rng-1024a))) - (is (= [ [1 [1 1]] [0.0 [0.0 0.0]]] - (bootstrap-bca (take 20 (repeatedly (constantly 1))) - (juxt criterium.stats/mean criterium.stats/variance) - 100 - [0.5 ci (- 1.0 ci)] - criterium.well/well-rng-1024a))))) diff --git a/test/criterium/stats_test.clj b/test/criterium/stats_test.clj deleted file mode 100644 index 2a2e762..0000000 --- a/test/criterium/stats_test.clj +++ /dev/null @@ -1,94 +0,0 @@ -(ns criterium.stats-test - (:use clojure.test - criterium.stats) - (:require criterium.well)) - -(defmacro test-max-error [expected actual max-error] - `(is (< (Math/abs (- ~expected ~actual)) ~max-error))) - -(deftest mean-test - (is (= 1 (mean (take 20 (repeatedly (constantly 1)))))) - (is (= 3 (mean (range 0 7))))) - -(deftest sum-test - (is (= 20 (sum (take 20 (repeatedly (constantly 1)))))) - (is (= 21 (sum (range 0 7))))) - -(deftest sum-of-squares-test - (is (= 20.0 (sum-of-squares (take 20 (repeatedly (constantly 1)))))) - (is (= 80.0 (sum-of-squares (take 20 (repeatedly (constantly 2)))))) - (is (= 91.0 (sum-of-squares (range 0 7))))) - -(deftest variance-test - (is (= 0.0 (variance (take 20 (repeatedly (constantly 1)))))) - (is (= 4.0 (variance (range 0 7) 0)))) - -(deftest median-test - (is (= [5 [1 2] [7 8]] - (median [1 2 5 7 8]))) - (is (= [7/2 [1 2 2] [5 7 8]] - (median [1 2 2 5 7 8])))) - -(deftest quartiles-test - (is (= [3/2 5 15/2] - (quartiles [1 2 5 7 8]))) - (is (= [2 7/2 7] - (quartiles [1 2 2 5 7 8])))) - -(deftest boxplot-outlier-thresholds-test - (is (= [-4.0 -1.0 7.0 10.0] (boxplot-outlier-thresholds 2.0 4.0)))) - -(deftest bootstrap-estimate-test - (is (= [1 0.0 [1.0 1.0]] - (bootstrap-estimate (take 20 (repeatedly (constantly 1)))))) - (let [[m s [l u]] (bootstrap-estimate (take 1000000 (repeatedly rand)))] - (is (test-max-error 0.5 m 1e-2)) - (is (test-max-error 0 l 0.2)) - (is (test-max-error 1 u 0.2)) - (is (test-max-error 0.0833 s 0.2)))) - -(deftest bootstrap-estimate-scale-test - (is (= [1e-9 [1e-8 1e-8]] - (scale-bootstrap-estimate [1 1 [10 10]] 1e-9)))) - -;; Values from R, qnorm (with options(digits=15)) -(deftest normal-quantile-test - (is (pos? (normal-quantile 0.5001))) - (is (neg? (normal-quantile 0.4999))) - (is (< 2e-8 (- (normal-quantile 0.999) (normal-quantile 0.001)))) - (let [max-error 1.0e-7] - (is (= 0.0 (normal-quantile 0.5))) - (is (test-max-error 1.2815515655446 (normal-quantile 0.9) max-error)) - (is (test-max-error 0.674489750196082 (normal-quantile 0.75) max-error)) - (is (test-max-error -1.03643338949379 (normal-quantile 0.15) max-error)) - (is (test-max-error -2.32634787404084 (normal-quantile 0.01) max-error)))) - - -;; Values from R, erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1 -(deftest erf-test - (let [max-error 1.5e-7] - (test-max-error 0.999999984582742 (erf 4) max-error) - (test-max-error 0.995322265018953 (erf 2) max-error) - (test-max-error 0.842700792949715 (erf 1) max-error) - (test-max-error 0.112462916018285 (erf 0.1) max-error) - (test-max-error 0.0112834155558497 (erf 0.01) max-error))) - -;; Values from R, pnorm -(deftest normal-cdf-test - (let [max-error 1.5e-7] - (test-max-error 0.99865010196837 (normal-cdf 3.0) max-error) - (test-max-error 0.977249868051821 (normal-cdf 2.0) max-error) - (test-max-error 0.841344746068543 (normal-cdf 1.0) max-error) - (test-max-error 0.691462461274013 (normal-cdf 0.5) max-error) - (test-max-error 0.5 (normal-cdf 0.0) max-error) - (test-max-error 0.158655253931457 (normal-cdf -1.0) max-error) - (test-max-error 0.00134989803163009 (normal-cdf -3.0) max-error))) - -(deftest quantiles-test - (let [max-error 1.5e-7] - (test-max-error 1.0 (quantile 0.5 [0 1 2]) max-error) - (test-max-error 1.5 (quantile 0.5 [0 1 2 3]) max-error) - (test-max-error 1.0 (quantile 0.25 [0 1 1.5 2 3]) max-error) - (test-max-error 2.0 (quantile 0.75 [0 1 1.5 2 3]) max-error)) - (is (= 5.0 (quantile 0.05 (range 0 101)))) - (is (= 95.0 (quantile 0.95 (range 0 101))))) diff --git a/tests.edn b/tests.edn new file mode 100644 index 0000000..38757ae --- /dev/null +++ b/tests.edn @@ -0,0 +1,29 @@ +#kaocha/v1 +{:tests [{:kaocha.testable/type :kaocha.type/clojure.test + :id :all + :ns-patterns ["-test$"] + :source-paths ["bases/criterium/src" + "bases/agent/src"] + :kaocha/source-paths ["bases/criterium/src" + "bases/agent/src"] + :test-paths ["bases/criterium/test" + "bases/agent/test"] + :skip-meta [:very-slow]}] + + :plugins [:kaocha.plugin/randomize + :kaocha.plugin/filter + :kaocha.plugin/capture-output + :kaocha.plugin/profiling + #_:kaocha.plugin/cloverage] + :cloverage/opts {:ns-regex ["^criterium.*$"]} + :capture-output? #profile {:default true + :dots true + :debug false} + :reporter #profile {:dots kaocha.report/dots + :default kaocha.report/documentation + :progress kaocha.report.progress/report + :debug kaocha.report/debug} + :kaocha.plugin.randomize/randomize? true + :kaocha.plugin.profiling/count 10 + :kaocha.plugin.profiling/profiling? #profile {:default false + :profile true}} diff --git a/workspace.edn b/workspace.edn new file mode 100644 index 0000000..016409e --- /dev/null +++ b/workspace.edn @@ -0,0 +1,11 @@ +{:top-namespace "criterium" + :interface-ns "interface" + :default-profile-name "default" + :compact-views #{} + :vcs {:name "git" + :auto-add false} + :tag-patterns {:stable "stable-*" + :release "v[0-9]*"} + :projects {"development" {:alias "dev"} + "criterium" {:alias "core"}} + :test {:create-test-runner polylith-kaocha.test-runner/create}}