Skip to content

Commit

Permalink
Update benchmark script to drop outliers (#361)
Browse files Browse the repository at this point in the history
* MVP

* modify nodeDump to not use traversal

* Resource -> TestResource

* mvp, though about to rework

* removed debug conditional

* updating nodeDump, but currently errors out

* added console error

* additional sanity check

* fixed improper API usage, made things clearer

* removing debug value

* merged master conflict

* fixing a few issues

* comment changes
  • Loading branch information
JordanBoltonMN authored Mar 8, 2023
1 parent aacb599 commit 3aed503
Showing 1 changed file with 70 additions and 33 deletions.
103 changes: 70 additions & 33 deletions src/test/scripts/createBenchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,59 +11,107 @@ import { BenchmarkTraceManager, NoOpTraceManagerInstance } from "../../powerquer
import { TestConstants, TestFileUtils, TestResourceUtils } from "../testUtils";
import { TestResource } from "../testUtils/resourceUtils";

const IterationsPerFile: number = 1;
const BenchmarkDirectory: string = path.join(__dirname, "benchmark");

// We want to run each file ${IterationsPerFile} times to get a more accurate average duration.
const IterationsPerFile: number = 100;
// Additionally, we drop the top and bottom ${IterationPercentageDropped}% of
// durations from iterations to reduce the impact of outliers.
const IterationPercentageDropped: number = 0.05;
const NumIterationsDropped: number = Math.floor(IterationsPerFile * IterationPercentageDropped);

// Writes a bunch of trace entries to disk.
// Usually not useful or needed, especially since it adds a bunch of IO overhead.
const WriteTracesToDisk: boolean = false;

interface ParserSummary {
readonly durationAverage: number;
readonly durations: ReadonlyArray<number>;
readonly durationSummed: number;
readonly durations: Durations;
readonly durationsFiltered: Durations;
readonly failedToParseResourcePaths: ReadonlyArray<string> | null;
readonly parserName: string;
}

interface ResourceSummary {
readonly durationAverage: number;
readonly durations: ReadonlyArray<number>;
readonly durationSummed: number;
readonly durations: Durations;
readonly durationsFiltered: Durations;
readonly failedToParse: boolean;
readonly parserName: string;
readonly filePath: string;
readonly parserName: string;
}

interface Durations {
readonly average: number;
readonly durations: ReadonlyArray<number>;
readonly summed: number;
}

function jsonStringify(value: unknown): string {
return JSON.stringify(value, undefined, 4);
}

function zFill(value: number): string {
return value.toString().padStart(Math.ceil(Math.log10(IterationsPerFile + 1)), "0");
function zFill(currentValue: number, upperBound: number): string {
return currentValue.toString().padStart(Math.ceil(Math.log10(upperBound + 1)), "0");
}

function createParserSummaryDurations(
resourceSummaries: ReadonlyArray<ResourceSummary>,
filterOutOutliers: boolean,
): Durations {
const durations: ReadonlyArray<number> = [...resourceSummaries].map((resourceSummary: ResourceSummary) =>
filterOutOutliers ? resourceSummary.durationsFiltered.average : resourceSummary.durations.average,
);

const summed: number = durations.reduce((acc: number, curr: number) => acc + curr, 0);

return {
durations,
summed,
average: summed / resourceSummaries.length,
};
}

function createResourceSummaryDurations(durations: ReadonlyArray<number>, filterOutOutliers: boolean): Durations {
if (filterOutOutliers) {
durations = [...durations].sort().slice(NumIterationsDropped, durations.length - NumIterationsDropped);
}

const summed: number = durations.reduce((acc: number, curr: number) => acc + curr, 0);
const average: number = summed / durations.length;

return {
durations,
summed,
average,
};
}

// Triple for-loop with parsers, resource filepaths, and an iteration count being the parameters.
// The inner most loop is run ${IterationsPerFile} times and calls `TaskUtils.tryLexParse`.
// It's to find the average duration of a parse for a given (file, parser) pair.
// Durations are initially measured in fractional milliseconds, then the fractional component is dropped.
// The outer loop summarizes the aggregate durations for each parser across all files.
// Optionally writes traces to disk with $WriteTracesToDIsk.
// Optionally writes traces to disk with $WriteTracesToDisk.
async function main(): Promise<void> {
// Even though we want to sum up the durations by parser it's better to order
// the triple-for-loop this way due to file IO.
const resourceSummariesByParserName: Map<string, ReadonlyArray<ResourceSummary>> = new Map();
const resources: ReadonlyArray<TestResource> = TestResourceUtils.getResources();
const numResources: number = resources.length;

for (let resourceIndex: number = 0; resourceIndex < numResources; resourceIndex += 1) {
const { fileContents, filePath, resourceName }: TestResource = ArrayUtils.assertGet(resources, resourceIndex);

console.log(`Starting resource ${zFill(resourceIndex + 1)} out of ${numResources}: ${filePath}`);
console.log(`Starting resource ${zFill(resourceIndex + 1, numResources)} out of ${numResources}: ${filePath}`);

for (const [parserName, parser] of TestConstants.ParserByParserName.entries()) {
let failedToParse: boolean = false;
const durations: number[] = [];

for (let iteration: number = 0; iteration < IterationsPerFile; iteration += 1) {
console.log(`\tIteration ${zFill(iteration + 1)} out of ${IterationsPerFile} using ${parserName}`);
console.log(
`\tIteration ${zFill(
iteration + 1,
IterationsPerFile,
)} out of ${IterationsPerFile} using ${parserName}`,
);

let contents: string = "";

Expand Down Expand Up @@ -96,20 +144,16 @@ async function main(): Promise<void> {
"traces",
parserName,
resourceName,
`iteration_${zFill(iteration)}.log`,
`iteration_${zFill(iteration, IterationsPerFile)}.log`,
),
contents,
);
}
}

const durationSummed: number = Math.floor(durations.reduce((a: number, b: number) => a + b, 0));
const durationAverage: number = Math.floor(durationSummed / durations.length);

const resourceSummary: ResourceSummary = {
durationAverage,
durations,
durationSummed,
durations: createResourceSummaryDurations(durations, false),
durationsFiltered: createResourceSummaryDurations(durations, true),
failedToParse,
parserName,
filePath,
Expand All @@ -127,21 +171,14 @@ async function main(): Promise<void> {
}

for (const [parserName, resourceSummaries] of resourceSummariesByParserName.entries()) {
const durations: ReadonlyArray<number> = resourceSummaries.map(
(resourceSummary: ResourceSummary) => resourceSummary.durationAverage,
);

const failedToParseResourcePaths: ReadonlyArray<string> = resourceSummaries
.filter((resourceSummary: ResourceSummary) => resourceSummary.failedToParse)
.map((resourceSummary: ResourceSummary) => resourceSummary.filePath);

const durationSummed: number = Math.floor(durations.reduce((a: number, b: number) => a + b, 0));
const durationAverage: number = Math.floor(durationSummed / resourceSummariesByParserName.size);
.map((resourceSummary: ResourceSummary) => resourceSummary.filePath)
.sort();

const parserSummary: ParserSummary = {
durationAverage,
durations,
durationSummed,
durations: createParserSummaryDurations(resourceSummaries, false),
durationsFiltered: createParserSummaryDurations(resourceSummaries, true),
failedToParseResourcePaths: failedToParseResourcePaths ? failedToParseResourcePaths : null,
parserName,
};
Expand Down

0 comments on commit 3aed503

Please sign in to comment.