Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Commit

Permalink
Trace diagnostic tool
Browse files Browse the repository at this point in the history
This tool branches from the marginal1d tool and rebases against main.
Added files include those needed for displaying a Trace diagnostic tool
for Bean Machine models.

This also updates the Coin_flipping.ipynb tutorial to include the new
diagnostic tool.
  • Loading branch information
ndmlny-qs committed Oct 24, 2022
1 parent 27b8c69 commit 1d32e05
Show file tree
Hide file tree
Showing 16 changed files with 1,738 additions and 94 deletions.
6 changes: 5 additions & 1 deletion src/beanmachine/ppl/diagnostics/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@


if sys.version_info >= (3, 8):
# NOTE: We need to import NotRequired from typing_extensions until PEP 655 is
# accepted, see https://peps.python.org/pep-0655/. This is to follow the
# interface objects in JavaScript that allow keys to not be required using ?.
from typing import TypedDict
from typing_extensions import NotRequired
else:
from typing_extensions import TypedDict
from typing_extensions import NotRequired, TypedDict


TOOLS_DIR = Path(__file__).parent.resolve()
Expand Down
89 changes: 89 additions & 0 deletions src/beanmachine/ppl/diagnostics/tools/js/src/stats/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,92 @@ export const numericalSort = (data: number[]): number[] => {
return a < b ? -1 : a > b ? 1 : 0;
});
};

/**
* Determine the shape of the given array.
*
* @param {any[]} data - Any array of data.
* @returns {number[]} The shape of the data as an array.
*/
export const shape = (data: any[]): number[] => {
// From https://stackoverflow.com/questions/10237615/get-size-of-dimensions-in-array
const computeShape = (array: any[]): any[] => {
return array.length ? [...[array.length], ...computeShape(array[0])] : [];
};
const arrayShape = computeShape(data);
// Remove the empty array that will exist at the end of the shape array, since it is
// the returned "else" value from above.
const dataShape = [];
for (let i = 0; i < arrayShape.length; i += 1) {
if (!Array.isArray(arrayShape[i])) {
dataShape.push(arrayShape[i]);
}
}
return dataShape;
};

/**
* Create an array that starts and stops with the given number of steps.
*
* @param {number} start - Where to start the array from.
* @param {number} stop - Where to stop the array.
* @param {number} [step] - The step size to take.
* @param {boolean} [closed] - Flag used to return a closed array or not.
* @param {null | number} [size] - If not null, then will return an array with the given
* size.
* @returns {number[]} An array that is linearly spaced between the start and stop
* values.
*/
export const linearRange = (
start: number,
stop: number,
step: number = 1,
closed: boolean = true,
size: null | number = null,
): number[] => {
if (size !== null) {
step = (stop - start) / size;
}
let len = (stop - start) / step + 1;
if (!closed) {
len = (stop - start - step) / step + 1;
}
return Array.from({length: len}, (_, i) => {
return start + i * step;
});
};

/**
* Return the indices that would sort the array. Follows NumPy's implementation.
*
* @param {number[]} data - The data to sort.
* @returns {number[]} An array of indices that would sort the original array.
*/
export const argSort = (data: number[]): number[] => {
const dataCopy = data.slice(0);
return dataCopy
.map((value, index) => {
return [value, index];
})
.sort((a, b) => {
return a[0] - b[0];
})
.map((value) => {
return value[1];
});
};

/**
* Count the number of time a value appears in an array.
*
* @param {number[]} data - The numeric array to count objects for.
* @returns {{[key: string]: number}} An object that contains the keys as the items in
* the original array, and values that are counts of the key.
*/
export const valueCounts = (data: number[]): {[key: string]: number} => {
const counts: {[key: string]: number} = {};
for (let i = 0; i < data.length; i += 1) {
counts[data[i]] = (counts[data[i]] || 0) + 1;
}
return counts;
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/

import {argSort, valueCounts} from './array';

/**
* Scale the given array of numbers by the given scaleFactor. Note that this method
* divides values in the given array by the scaleFactor.
Expand Down Expand Up @@ -32,3 +34,49 @@ export const scaleToOne = (data: number[]): number[] => {
const scaleFactor = Math.max(...data);
return scaleBy(data, scaleFactor);
};

/**
* Assign ranks to the given data. Follows SciPy's and ArviZ's implementations.
*
* @param {number[]} data - The numeric data to rank.
* @returns {number[]} An array of rankings.
*/
export const rankData = (data: number[]): number[] => {
const n = data.length;
const rank = Array(n);
const sortedIndex = argSort(data);
for (let i = 0; i < rank.length; i += 1) {
rank[sortedIndex[i]] = i + 1;
}
const counts = valueCounts(data);
const countsArray = Object.entries(counts);
const keys = [];
const keyCounts = [];
for (let i = 0; i < countsArray.length; i += 1) {
const [key, count] = countsArray[i];
if (count > 1) {
keys.push(parseFloat(key));
keyCounts.push(count);
}
}
for (let i = 0; i < keys.length; i += 1) {
const repeatIndices = [];
for (let j = 0; j < data.length; j += 1) {
if (data[j] === keys[i]) {
repeatIndices.push(j);
}
}
const rankValues = [];
for (let k = 0; k < repeatIndices.length; k += 1) {
rankValues.push(rank[repeatIndices[k]]);
}
const sum = rankValues.reduce((previousValue, currentValue) => {
return previousValue + currentValue;
}, 0.0);
const rankMean = sum / rankValues.length;
for (let k = 0; k < repeatIndices.length; k += 1) {
rank[repeatIndices[k]] = rankMean;
}
}
return rank;
};
146 changes: 146 additions & 0 deletions src/beanmachine/ppl/diagnostics/tools/js/src/stats/histogram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/* import {calculateHistogram} from 'compute-histogram'; */
import {linearRange, numericalSort, shape} from './array';
import {rankData, scaleToOne} from './dataTransformation';
import {mean as computeMean} from './pointStatistic';

/**
* Compute the histogram of the given data.
*
* @param {number[]} data - Data to bin.
* @param {number} [numBins] - The number of bins to use for the histogram. If none is
* given, then we follow ArviZ's implementation by using twice then number of bins
* of the Sturges formula.
* @returns {number[][]} [TODO:description]
*/
export const calculateHistogram = (data: number[], numBins: number = 0): number[][] => {
const sortedData = numericalSort(data);
const numSamples = sortedData.length;
const dataMin = Math.min(...data);
const dataMax = Math.max(...data);
if (numBins === 0) {
numBins = Math.floor(Math.ceil(2 * Math.log2(numSamples)) + 1);
}
const binSize =
(dataMax - dataMin) / numBins === 0 ? 1 : (dataMax - dataMin) / numBins;
const bins = Array(numBins)
.fill([0, 0])
.map((_, i) => {
return [i, 0];
});

for (let i = 0; i < data.length; i += 1) {
const datum = sortedData[i];
let binIndex = Math.floor((datum - dataMin) / binSize);
// Subtract 1 if the value lies on the last bin.
if (binIndex === numBins) {
binIndex -= 1;
}
bins[binIndex][1] += 1;
}
return bins;
};

export interface RankHistogram {
[key: string]: {
quad: {
left: number[];
top: number[];
right: number[];
bottom: number[];
chain: number[];
draws: string[];
rank: number[];
};
line: {x: number[]; y: number[]};
chain: number[];
rankMean: number[];
mean: number[];
};
}

/**
* A histogram of rank data.
*
* @param {number[][]} data - Raw random variable data for several chains.
* @returns {RankHistogram} A histogram of the data rankings.
*/
export const rankHistogram = (data: number[][]): RankHistogram => {
const [numChains, numDraws] = shape(data);
const numSamples = numChains * numDraws;
const flatData = data.flat();

// Calculate the rank of the data and ensure it is the same shape as the original
// data.
const rank = rankData(flatData);
const rankArray = [];
let start = Number.NaN;
let end = Number.NaN;
for (let i = 0; i < numChains; i += 1) {
if (i === 0) {
start = 0;
end = numDraws;
} else {
start = end;
end = (i + 1) * numDraws;
}
const chainRanks = rank.slice(start, end);
rankArray.push(chainRanks);
start = end;
end = (i + 1) * numDraws;
}

// Calculate the number of bins needed. We will follow ArviZ and use twice the result
// using the Sturges' formula.
const numBins = Math.floor(Math.ceil(2 * Math.log2(numSamples)) + 1);
const lastBinEdge = Math.max(...rank);

// Calculate the bin edges. Since the linearRange function computes a linear spacing
// of values between the start and end point, we need to ensure they are integer
// values.
let binEdges = linearRange(0, lastBinEdge, 1, true, numBins);
binEdges = binEdges.map((value) => {
return Math.ceil(value);
});

// Calculate the histograms of the rank data, and normalize it for each chain.
const output = {} as RankHistogram;
for (let i = 0; i < numChains; i += 1) {
const chainIndex = i + 1;
const chainName = `chain${chainIndex}`;
const chainRankHistogram = calculateHistogram(rankArray[i], numBins);
let counts = [];
for (let j = 0; j < chainRankHistogram.length; j += 1) {
counts.push(chainRankHistogram[j][1]);
}
counts = scaleToOne(counts);
const chainCounts = counts.map((value) => {
return value + i;
});

const chainRankMean = computeMean(chainCounts);
const left = binEdges.slice(0, binEdges.length - 1);
const right = binEdges.slice(1);
const binLabel = [];
for (let j = 0; j < left.length; j += 1) {
binLabel.push(`${left[j].toLocaleString()}-${right[j].toLocaleString()}`);
}
const x = linearRange(0, numSamples, 1);
const y = Array(x.length).fill(chainRankMean);
output[chainName] = {
quad: {
left: left,
top: chainCounts,
right: right,
bottom: Array(numBins).fill(i),
chain: Array(left.length).fill(i + 1),
draws: binLabel,
rank: counts,
},
line: {x: x, y: y},
chain: Array(x.length).fill(i + 1),
rankMean: Array(x.length).fill(chainIndex - chainRankMean),
mean: Array(x.length).fill(computeMean(counts)),
};
}
return output;
};
Loading

0 comments on commit 1d32e05

Please sign in to comment.