Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Commit

Permalink
Trace tool (#1723)
Browse files Browse the repository at this point in the history
Summary:
### Motivation

Continued work on the diagnostics tool, this includes a model trace tool.

### Changes proposed

- Changes include new JavaScript and Python files for the Bokeh Application.
- Updates to helper modules in stats for the JavaScript

Pull Request resolved: #1723

Test Plan:
The tool was run in the Coin flipping tutorial.

### Types of changes

- [ ] Docs change / refactoring / dependency upgrade
- [ ] Bug fix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

### Checklist

- [x] My code follows the code style of this project.
- [ ] My change requires a change to the documentation.
- [ ] I have updated the documentation accordingly.
- [x] I have read the **[CONTRIBUTING](https://github.com/facebookresearch/beanmachine/blob/main/CONTRIBUTING.md)** document.
- [ ] I have added tests to cover my changes.
- [ ] All new and existing tests passed.
- [x] The title of my pull request is a short description of the requested changes.

Reviewed By: feynmanliang

Differential Revision: D39978113

Pulled By: horizon-blue

fbshipit-source-id: a319ed8d20ffc45ca678203050768612294694a4
  • Loading branch information
ndmlny-qs authored and facebook-github-bot committed Oct 24, 2022
1 parent 709a925 commit 339700e
Show file tree
Hide file tree
Showing 23 changed files with 1,775 additions and 207 deletions.
8 changes: 6 additions & 2 deletions src/beanmachine/ppl/diagnostics/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@
# flake8: noqa

"""Visual diagnostic tools for Bean Machine models."""

import sys
from pathlib import Path


if sys.version_info >= (3, 8):
# NOTE: We need to import NotRequired from typing_extensions until PEP 655 is
# accepted, see https://peps.python.org/pep-0655/. This is to follow the
# interface objects in JavaScript that allow keys to not be required using ?.
from typing import TypedDict

from typing_extensions import NotRequired
else:
from typing_extensions import TypedDict
from typing_extensions import NotRequired, TypedDict


TOOLS_DIR = Path(__file__).parent.resolve()
Expand Down
25 changes: 0 additions & 25 deletions src/beanmachine/ppl/diagnostics/tools/js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
"fast-kde": "^0.2.1"
},
"devDependencies": {
"@types/node": "^18.0.4",
"@typescript-eslint/eslint-plugin": "^5.30.5",
"@typescript-eslint/parser": "^5.30.5",
"eslint": "^8.19.0",
"eslint-config-airbnb": "^19.0.4",
Expand All @@ -24,32 +22,9 @@
"eslint-plugin-prefer-arrow": "^1.2.3",
"eslint-plugin-react": "^7.28.0",
"eslint-plugin-react-hooks": "^4.3.0",
"prettier": "^2.7.1",
"ts-loader": "^9.3.1",
"ts-node": "^10.9.1",
"typescript": "^4.7.4",
"webpack": "^5.74.0",
"webpack-cli": "^4.10.0"
},
"overrides": {
"cwise": "$cwise",
"minimist": "$minimist",
"quote-stream": "$quote-stream",
"static-eval": "$static-eval",
"static-module": "$static-module",
"typedarray-pool": "$typedarray-pool"
},
"peerDependencies": {
"@types/cwise": "^1.0.4",
"@types/minimist": "^1.2.2",
"@types/static-eval": "^0.2.31",
"@types/typedarray-pool": "^1.1.2",
"buffer": "^6.0.3",
"cwise": "^1.0.10",
"minimist": "^1.2.6",
"quote-stream": "^1.0.2",
"static-eval": "2.1.0",
"static-module": "^3.0.4",
"typedarray-pool": "^1.2.0"
}
}
89 changes: 89 additions & 0 deletions src/beanmachine/ppl/diagnostics/tools/js/src/stats/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,92 @@ export const numericalSort = (data: number[]): number[] => {
return a < b ? -1 : a > b ? 1 : 0;
});
};

/**
* Determine the shape of the given array.
*
* @param {any[]} data - Any array of data.
* @returns {number[]} The shape of the data as an array.
*/
export const shape = (data: any[]): number[] => {
// From https://stackoverflow.com/questions/10237615/get-size-of-dimensions-in-array
const computeShape = (array: any[]): any[] => {
return array.length ? [...[array.length], ...computeShape(array[0])] : [];
};
const arrayShape = computeShape(data);
// Remove the empty array that will exist at the end of the shape array, since it is
// the returned "else" value from above.
const dataShape = [];
for (let i = 0; i < arrayShape.length; i += 1) {
if (!Array.isArray(arrayShape[i])) {
dataShape.push(arrayShape[i]);
}
}
return dataShape;
};

/**
* Create an array that starts and stops with the given number of steps.
*
* @param {number} start - Where to start the array from.
* @param {number} stop - Where to stop the array.
* @param {number} [step] - The step size to take.
* @param {boolean} [closed] - Flag used to return a closed array or not.
* @param {null | number} [size] - If not null, then will return an array with the given
* size.
* @returns {number[]} An array that is linearly spaced between the start and stop
* values.
*/
export const linearRange = (
start: number,
stop: number,
step: number = 1,
closed: boolean = true,
size: null | number = null,
): number[] => {
if (size !== null) {
step = (stop - start) / size;
}
let len = (stop - start) / step + 1;
if (!closed) {
len = (stop - start - step) / step + 1;
}
return Array.from({length: len}, (_, i) => {
return start + i * step;
});
};

/**
* Return the indices that would sort the array. Follows NumPy's implementation.
*
* @param {number[]} data - The data to sort.
* @returns {number[]} An array of indices that would sort the original array.
*/
export const argSort = (data: number[]): number[] => {
const dataCopy = data.slice(0);
return dataCopy
.map((value, index) => {
return [value, index];
})
.sort((a, b) => {
return a[0] - b[0];
})
.map((value) => {
return value[1];
});
};

/**
* Count the number of time a value appears in an array.
*
* @param {number[]} data - The numeric array to count objects for.
* @returns {{[key: string]: number}} An object that contains the keys as the items in
* the original array, and values that are counts of the key.
*/
export const valueCounts = (data: number[]): {[key: string]: number} => {
const counts: {[key: string]: number} = {};
for (let i = 0; i < data.length; i += 1) {
counts[data[i]] = (counts[data[i]] || 0) + 1;
}
return counts;
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/

import {argSort, valueCounts} from './array';

/**
* Scale the given array of numbers by the given scaleFactor. Note that this method
* divides values in the given array by the scaleFactor.
Expand Down Expand Up @@ -32,3 +34,49 @@ export const scaleToOne = (data: number[]): number[] => {
const scaleFactor = Math.max(...data);
return scaleBy(data, scaleFactor);
};

/**
* Assign ranks to the given data. Follows SciPy's and ArviZ's implementations.
*
* @param {number[]} data - The numeric data to rank.
* @returns {number[]} An array of rankings.
*/
export const rankData = (data: number[]): number[] => {
const n = data.length;
const rank = Array(n);
const sortedIndex = argSort(data);
for (let i = 0; i < rank.length; i += 1) {
rank[sortedIndex[i]] = i + 1;
}
const counts = valueCounts(data);
const countsArray = Object.entries(counts);
const keys = [];
const keyCounts = [];
for (let i = 0; i < countsArray.length; i += 1) {
const [key, count] = countsArray[i];
if (count > 1) {
keys.push(parseFloat(key));
keyCounts.push(count);
}
}
for (let i = 0; i < keys.length; i += 1) {
const repeatIndices = [];
for (let j = 0; j < data.length; j += 1) {
if (data[j] === keys[i]) {
repeatIndices.push(j);
}
}
const rankValues = [];
for (let k = 0; k < repeatIndices.length; k += 1) {
rankValues.push(rank[repeatIndices[k]]);
}
const sum = rankValues.reduce((previousValue, currentValue) => {
return previousValue + currentValue;
}, 0.0);
const rankMean = sum / rankValues.length;
for (let k = 0; k < repeatIndices.length; k += 1) {
rank[repeatIndices[k]] = rankMean;
}
}
return rank;
};
152 changes: 152 additions & 0 deletions src/beanmachine/ppl/diagnostics/tools/js/src/stats/histogram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import {linearRange, numericalSort, shape} from './array';
import {rankData, scaleToOne} from './dataTransformation';
import {mean as computeMean} from './pointStatistic';

/**
* Compute the histogram of the given data.
*
* @param {number[]} data - Data to bin.
* @param {number} [numBins] - The number of bins to use for the histogram. If none is
* given, then we follow ArviZ's implementation by using twice then number of bins
* of the Sturges formula.
* @returns {number[][]} [TODO:description]
*/
export const calculateHistogram = (data: number[], numBins: number = 0): number[][] => {
const sortedData = numericalSort(data);
const numSamples = sortedData.length;
const dataMin = Math.min(...data);
const dataMax = Math.max(...data);
if (numBins === 0) {
numBins = Math.floor(Math.ceil(2 * Math.log2(numSamples)) + 1);
}
const binSize =
(dataMax - dataMin) / numBins === 0 ? 1 : (dataMax - dataMin) / numBins;
const bins = Array(numBins)
.fill([0, 0])
.map((_, i) => {
return [i, 0];
});

for (let i = 0; i < data.length; i += 1) {
const datum = sortedData[i];
let binIndex = Math.floor((datum - dataMin) / binSize);
// Subtract 1 if the value lies on the last bin.
if (binIndex === numBins) {
binIndex -= 1;
}
bins[binIndex][1] += 1;
}
return bins;
};

export interface RankHistogram {
[key: string]: {
quad: {
left: number[];
top: number[];
right: number[];
bottom: number[];
chain: number[];
draws: string[];
rank: number[];
};
line: {x: number[]; y: number[]};
chain: number[];
rankMean: number[];
mean: number[];
};
}

/**
* A histogram of rank data.
*
* @param {number[][]} data - Raw random variable data for several chains.
* @returns {RankHistogram} A histogram of the data rankings.
*/
export const rankHistogram = (data: number[][]): RankHistogram => {
const [numChains, numDraws] = shape(data);
const numSamples = numChains * numDraws;
const flatData = data.flat();

// Calculate the rank of the data and ensure it is the same shape as the original
// data.
const rank = rankData(flatData);
const rankArray = [];
let start = Number.NaN;
let end = Number.NaN;
for (let i = 0; i < numChains; i += 1) {
if (i === 0) {
start = 0;
end = numDraws;
} else {
start = end;
end = (i + 1) * numDraws;
}
const chainRanks = rank.slice(start, end);
rankArray.push(chainRanks);
start = end;
end = (i + 1) * numDraws;
}

// Calculate the number of bins needed. We will follow ArviZ and use twice the result
// using the Sturges' formula.
const numBins = Math.floor(Math.ceil(2 * Math.log2(numSamples)) + 1);
const lastBinEdge = Math.max(...rank);

// Calculate the bin edges. Since the linearRange function computes a linear spacing
// of values between the start and end point, we need to ensure they are integer
// values.
let binEdges = linearRange(0, lastBinEdge, 1, true, numBins);
binEdges = binEdges.map((value) => {
return Math.ceil(value);
});

// Calculate the histograms of the rank data, and normalize it for each chain.
const output = {} as RankHistogram;
for (let i = 0; i < numChains; i += 1) {
const chainIndex = i + 1;
const chainName = `chain${chainIndex}`;
const chainRankHistogram = calculateHistogram(rankArray[i], numBins);
let counts = [];
for (let j = 0; j < chainRankHistogram.length; j += 1) {
counts.push(chainRankHistogram[j][1]);
}
counts = scaleToOne(counts);
const chainCounts = counts.map((value) => {
return value + i;
});

const chainRankMean = computeMean(chainCounts);
const left = binEdges.slice(0, binEdges.length - 1);
const right = binEdges.slice(1);
const binLabel = [];
for (let j = 0; j < left.length; j += 1) {
binLabel.push(`${left[j].toLocaleString()}-${right[j].toLocaleString()}`);
}
const x = linearRange(0, numSamples, 1);
const y = Array(x.length).fill(chainRankMean);
output[chainName] = {
quad: {
left: left,
top: chainCounts,
right: right,
bottom: Array(numBins).fill(i),
chain: Array(left.length).fill(i + 1),
draws: binLabel,
rank: counts,
},
line: {x: x, y: y},
chain: Array(x.length).fill(i + 1),
rankMean: Array(x.length).fill(chainIndex - chainRankMean),
mean: Array(x.length).fill(computeMean(counts)),
};
}
return output;
};
Loading

0 comments on commit 339700e

Please sign in to comment.