From 3aed503efd5067777761ce976d539fffb28a5d88 Mon Sep 17 00:00:00 2001 From: JordanBoltonMN Date: Wed, 8 Mar 2023 15:31:45 -0600 Subject: [PATCH] Update benchmark script to drop outliers (#361) * MVP * modify nodeDump to not use traversal * Resource -> TestResource * mvp, though about to rework * removed debug conditional * updating nodeDump, but currently errors out * added console error * additional sanity check * fixed improper API usage, made things clearer * removing debug value * merged master conflict * fixing a few issues * comment changes --- src/test/scripts/createBenchmark.ts | 103 +++++++++++++++++++--------- 1 file changed, 70 insertions(+), 33 deletions(-) diff --git a/src/test/scripts/createBenchmark.ts b/src/test/scripts/createBenchmark.ts index 147a51cb..145c1238 100644 --- a/src/test/scripts/createBenchmark.ts +++ b/src/test/scripts/createBenchmark.ts @@ -11,33 +11,78 @@ import { BenchmarkTraceManager, NoOpTraceManagerInstance } from "../../powerquer import { TestConstants, TestFileUtils, TestResourceUtils } from "../testUtils"; import { TestResource } from "../testUtils/resourceUtils"; -const IterationsPerFile: number = 1; const BenchmarkDirectory: string = path.join(__dirname, "benchmark"); + +// We want to run each file ${IterationsPerFile} times to get a more accurate average duration. +const IterationsPerFile: number = 100; +// Additionally, we drop the top and bottom ${IterationPercentageDropped}% of +// durations from iterations to reduce the impact of outliers. +const IterationPercentageDropped: number = 0.05; +const NumIterationsDropped: number = Math.floor(IterationsPerFile * IterationPercentageDropped); + +// Writes a bunch of trace entries to disk. +// Usually not useful or needed, especially since it adds a bunch of IO overhead. const WriteTracesToDisk: boolean = false; interface ParserSummary { - readonly durationAverage: number; - readonly durations: ReadonlyArray; - readonly durationSummed: number; + readonly durations: Durations; + readonly durationsFiltered: Durations; readonly failedToParseResourcePaths: ReadonlyArray | null; readonly parserName: string; } interface ResourceSummary { - readonly durationAverage: number; - readonly durations: ReadonlyArray; - readonly durationSummed: number; + readonly durations: Durations; + readonly durationsFiltered: Durations; readonly failedToParse: boolean; - readonly parserName: string; readonly filePath: string; + readonly parserName: string; +} + +interface Durations { + readonly average: number; + readonly durations: ReadonlyArray; + readonly summed: number; } function jsonStringify(value: unknown): string { return JSON.stringify(value, undefined, 4); } -function zFill(value: number): string { - return value.toString().padStart(Math.ceil(Math.log10(IterationsPerFile + 1)), "0"); +function zFill(currentValue: number, upperBound: number): string { + return currentValue.toString().padStart(Math.ceil(Math.log10(upperBound + 1)), "0"); +} + +function createParserSummaryDurations( + resourceSummaries: ReadonlyArray, + filterOutOutliers: boolean, +): Durations { + const durations: ReadonlyArray = [...resourceSummaries].map((resourceSummary: ResourceSummary) => + filterOutOutliers ? resourceSummary.durationsFiltered.average : resourceSummary.durations.average, + ); + + const summed: number = durations.reduce((acc: number, curr: number) => acc + curr, 0); + + return { + durations, + summed, + average: summed / resourceSummaries.length, + }; +} + +function createResourceSummaryDurations(durations: ReadonlyArray, filterOutOutliers: boolean): Durations { + if (filterOutOutliers) { + durations = [...durations].sort().slice(NumIterationsDropped, durations.length - NumIterationsDropped); + } + + const summed: number = durations.reduce((acc: number, curr: number) => acc + curr, 0); + const average: number = summed / durations.length; + + return { + durations, + summed, + average, + }; } // Triple for-loop with parsers, resource filepaths, and an iteration count being the parameters. @@ -45,10 +90,8 @@ function zFill(value: number): string { // It's to find the average duration of a parse for a given (file, parser) pair. // Durations are initially measured in fractional milliseconds, then the fractional component is dropped. // The outer loop summarizes the aggregate durations for each parser across all files. -// Optionally writes traces to disk with $WriteTracesToDIsk. +// Optionally writes traces to disk with $WriteTracesToDisk. async function main(): Promise { - // Even though we want to sum up the durations by parser it's better to order - // the triple-for-loop this way due to file IO. const resourceSummariesByParserName: Map> = new Map(); const resources: ReadonlyArray = TestResourceUtils.getResources(); const numResources: number = resources.length; @@ -56,14 +99,19 @@ async function main(): Promise { for (let resourceIndex: number = 0; resourceIndex < numResources; resourceIndex += 1) { const { fileContents, filePath, resourceName }: TestResource = ArrayUtils.assertGet(resources, resourceIndex); - console.log(`Starting resource ${zFill(resourceIndex + 1)} out of ${numResources}: ${filePath}`); + console.log(`Starting resource ${zFill(resourceIndex + 1, numResources)} out of ${numResources}: ${filePath}`); for (const [parserName, parser] of TestConstants.ParserByParserName.entries()) { let failedToParse: boolean = false; const durations: number[] = []; for (let iteration: number = 0; iteration < IterationsPerFile; iteration += 1) { - console.log(`\tIteration ${zFill(iteration + 1)} out of ${IterationsPerFile} using ${parserName}`); + console.log( + `\tIteration ${zFill( + iteration + 1, + IterationsPerFile, + )} out of ${IterationsPerFile} using ${parserName}`, + ); let contents: string = ""; @@ -96,20 +144,16 @@ async function main(): Promise { "traces", parserName, resourceName, - `iteration_${zFill(iteration)}.log`, + `iteration_${zFill(iteration, IterationsPerFile)}.log`, ), contents, ); } } - const durationSummed: number = Math.floor(durations.reduce((a: number, b: number) => a + b, 0)); - const durationAverage: number = Math.floor(durationSummed / durations.length); - const resourceSummary: ResourceSummary = { - durationAverage, - durations, - durationSummed, + durations: createResourceSummaryDurations(durations, false), + durationsFiltered: createResourceSummaryDurations(durations, true), failedToParse, parserName, filePath, @@ -127,21 +171,14 @@ async function main(): Promise { } for (const [parserName, resourceSummaries] of resourceSummariesByParserName.entries()) { - const durations: ReadonlyArray = resourceSummaries.map( - (resourceSummary: ResourceSummary) => resourceSummary.durationAverage, - ); - const failedToParseResourcePaths: ReadonlyArray = resourceSummaries .filter((resourceSummary: ResourceSummary) => resourceSummary.failedToParse) - .map((resourceSummary: ResourceSummary) => resourceSummary.filePath); - - const durationSummed: number = Math.floor(durations.reduce((a: number, b: number) => a + b, 0)); - const durationAverage: number = Math.floor(durationSummed / resourceSummariesByParserName.size); + .map((resourceSummary: ResourceSummary) => resourceSummary.filePath) + .sort(); const parserSummary: ParserSummary = { - durationAverage, - durations, - durationSummed, + durations: createParserSummaryDurations(resourceSummaries, false), + durationsFiltered: createParserSummaryDurations(resourceSummaries, true), failedToParseResourcePaths: failedToParseResourcePaths ? failedToParseResourcePaths : null, parserName, };