-
-
Notifications
You must be signed in to change notification settings - Fork 84
PPC Calibration plots #352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
0e1e446
d784405
5cf62f9
01ac826
f9806eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
# x' PPC calibration | ||
#' | ||
#' Assess the calibration of the predictive distributions `yrep` in relation to | ||
#' the data `y'. | ||
#' See the **Plot Descriptions** section, below, for details. | ||
#' | ||
#' @name PPC-calibration | ||
#' @family PPCs | ||
#' | ||
#' @template args-y-yrep | ||
#' @template args-group | ||
#' | ||
#' @template return-ggplot-or-data | ||
#' | ||
#' @section Plot Descriptions: | ||
#' \describe{ | ||
#' \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{ | ||
#' | ||
#' }, | ||
#' \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{ | ||
#' | ||
#' }, | ||
#' \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{ | ||
#' | ||
#' } | ||
#' } | ||
#' | ||
#' @examples | ||
#' color_scheme_set("brightblue") | ||
#' | ||
#' # Make an example dataset of binary observations | ||
#' ymin <- range(example_y_data(), example_yrep_draws())[1] | ||
#' ymax <- range(example_y_data(), example_yrep_draws())[2] | ||
#' y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin)) | ||
#' prep <- (example_yrep_draws() - ymin) / (ymax - ymin) | ||
#' | ||
#' ppc_calibration_overlay(y, prep[1:50, ]) | ||
NULL | ||
|
||
|
||
#' @rdname PPC-calibration | ||
#' @export | ||
ppc_calibration_overlay <- function( | ||
y, prep, ..., linewidth = 0.25, alpha = 0.5) { | ||
check_ignored_arguments(...) | ||
data <- .ppc_calibration_data(y, prep) | ||
ggplot(data) + | ||
geom_abline(color = "black", linetype = 2) + | ||
geom_line( | ||
aes(value, cep, group = rep_id, color = "yrep"), | ||
linewidth = linewidth, alpha = alpha | ||
) + | ||
scale_color_ppc() + | ||
bayesplot_theme_get() + | ||
legend_none() + | ||
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + | ||
xlab("Predicted probability") + | ||
ylab("Conditional event probability") + | ||
NULL | ||
} | ||
|
||
#' @rdname PPC-calibration | ||
#' @export | ||
ppc_calibration_overlay_grouped <- function( | ||
y, prep, group, ..., linewidth = 0.25, alpha = 0.7) { | ||
check_ignored_arguments(...) | ||
data <- .ppc_calibration_data(y, prep, group) | ||
ggplot(data) + | ||
geom_abline(color = "black", linetype = 2) + | ||
geom_line(aes(value, cep, group = rep_id, color = "yrep"), | ||
linewidth = linewidth, alpha = alpha | ||
) + | ||
facet_wrap(vars(group)) + | ||
scale_color_ppc() + | ||
bayesplot_theme_get() + | ||
legend_none() + | ||
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + | ||
xlab("Predicted probability") + | ||
ylab("Conditional event probability") + | ||
NULL | ||
} | ||
|
||
#' @rdname PPC-calibration | ||
#' @export | ||
ppc_calibration <- function( | ||
y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) { | ||
check_ignored_arguments(...) | ||
data <- .ppc_calibration_data(y, prep) %>% | ||
group_by(y_id) %>% | ||
summarise( | ||
value = median(value), | ||
lb = quantile(cep, .5 - .5 * prob), | ||
ub = quantile(cep, .5 + .5 * prob), | ||
cep = median(cep) | ||
) | ||
|
||
ggplot(data) + | ||
aes(value, cep) + | ||
geom_abline(color = "black", linetype = 2) + | ||
geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + | ||
geom_line( | ||
aes(color = "y"), | ||
linewidth = linewidth | ||
) + | ||
scale_color_ppc() + | ||
scale_fill_ppc() + | ||
bayesplot_theme_get() + | ||
legend_none() + | ||
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + | ||
xlab("Predicted probability") + | ||
ylab("Conditional event probability") + | ||
NULL | ||
} | ||
|
||
#' @rdname PPC-calibration | ||
#' @export | ||
ppc_calibration_grouped <- function( | ||
y, prep, group, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) { | ||
check_ignored_arguments(...) | ||
data <- .ppc_calibration_data(y, prep, group) %>% | ||
group_by(group, y_id) %>% | ||
summarise( | ||
value = median(value), | ||
lb = quantile(cep, .5 - .5 * prob), | ||
ub = quantile(cep, .5 + .5 * prob), | ||
cep = median(cep) | ||
) | ||
|
||
ggplot(data) + | ||
aes(value, cep) + | ||
geom_abline(color = "black", linetype = 2) + | ||
geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + | ||
geom_line( | ||
aes(color = "y"), | ||
linewidth = linewidth | ||
) + | ||
facet_wrap(vars(group)) + | ||
scale_color_ppc() + | ||
scale_fill_ppc() + | ||
bayesplot_theme_get() + | ||
legend_none() + | ||
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + | ||
xlab("Predicted probability") + | ||
ylab("Conditional event probability") + | ||
NULL | ||
} | ||
|
||
#' @rdname PPC-calibration | ||
#' @export | ||
ppc_loo_calibration <- function( | ||
y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) { | ||
check_ignored_arguments(...) | ||
data <- .ppc_calibration_data(y, prep) | ||
ggplot(data) + | ||
geom_abline(color = "black", linetype = 2) + | ||
geom_line( | ||
aes(value, cep, group = rep_id, color = "yrep"), | ||
linewidth = linewidth, alpha = alpha | ||
) + | ||
scale_color_ppc() + | ||
bayesplot_theme_get() + | ||
legend_none() + | ||
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + | ||
xlab("Predicted probability") + | ||
ylab("Conditional event probability") + | ||
NULL | ||
} | ||
|
||
#' @rdname PPC-calibration | ||
#' @export | ||
ppc_loo_calibration_grouped <- function( | ||
y, prep, group, lw, ..., linewidth = 0.25, alpha = 0.7) { | ||
check_ignored_arguments(...) | ||
data <- .ppc_calibration_data(y, prep, group) | ||
ggplot(data) + | ||
geom_abline(color = "black", linetype = 2) + | ||
geom_line(aes(value, cep, group = rep_id, color = "yrep"), | ||
linewidth = linewidth, alpha = alpha | ||
) + | ||
facet_wrap(vars(group)) + | ||
scale_color_ppc() + | ||
bayesplot_theme_get() + | ||
legend_none() + | ||
coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + | ||
xlab("Predicted probability") + | ||
ylab("Conditional event probability") + | ||
NULL | ||
} | ||
|
||
.ppc_calibration_data <- function(y, prep, group = NULL) { | ||
y <- validate_y(y) | ||
n_obs <- length(y) | ||
prep <- validate_predictions(prep, n_obs) | ||
if (any(prep > 1 | prep < 0)) { | ||
stop("Values of ´prep´ should be predictive probabilities between 0 and 1.") | ||
} | ||
if (!is.null(group)) { | ||
group <- validate_group(group, n_obs) | ||
} else { | ||
group <- rep(1, n_obs * nrow(prep)) | ||
} | ||
|
||
if (requireNamespace("monotone", quietly = TRUE)) { | ||
monotone <- monotone::monotone | ||
} else { | ||
monotone <- function(y) { | ||
stats::isoreg(y)$yf | ||
} | ||
} | ||
Comment on lines
+203
to
+209
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there an advantage to using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is, does it do something slightly better? Or the same thing more efficiently? I've seen |
||
.ppd_data(prep, group = group) %>% | ||
group_by(group, rep_id) %>% | ||
mutate( | ||
ord = order(value), | ||
value = value[ord], | ||
cep = monotone(y[ord]) | ||
) |> | ||
ungroup() | ||
} | ||
|
||
.loo_resample_data <- function(prep, lw, psis_object) { | ||
lw <- .get_lw(lw, psis_object) | ||
stopifnot(identical(dim(prep), dim(lw))) | ||
# Work in progress here... | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So for these functions
prep
is a matrix of probabilities and not actually a matrix of draws of binary outcomes from the posterior predictive distribution, right? I think in that case the argument nameprep
makes sense. But the description at the top of the file sayswhich makes it sound like the user should give us
yrep
. So I think we just need to reconcile how we describe this to the user.