Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrapper function for density plots #126

Open
wants to merge 15 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Imports:
magrittr,
mice,
purrr,
reshape2,
rlang,
stats,
stringr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(bwplot)
export(densityplot)
export(ggmice)
export(plot_corr)
export(plot_density)
export(plot_flux)
export(plot_pattern)
export(plot_pred)
Expand Down
156 changes: 156 additions & 0 deletions R/plot_density.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' Plot the density of observed and imputed values
#'
#' @param data An object of class [mice::mids].
#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all".
#' @param panels Logical, vector of length 1 specifying whether the density plots should be broken into panels (TRUE) or not (FALSE)
#'
#' @return An object of class [ggplot2::ggplot].
#'
#' @examples
#' imp <- mice::mice(mice::nhanes, print = FALSE)
#' plot_density(data = imp)
#' @export
plot_density <- function(data, vrb = "all", panels = FALSE) {
verify_data(data, imp = TRUE)
if (is.null(data$chainMean) && is.null(data$chainVar)) {
cli::cli_abort("No convergence diagnostics found", call. = FALSE)
}

# List all variables with missing values
vrb <- substitute(vrb)
varlist <- colnames(data$where[, colSums(data$where) != 0])

# Select variables
if (as.character(vrb)[1] == "all") {
vrb <- varlist
} else {
vrb <- names(dplyr::select(data$data, {{ vrb }}))
}
if (any(vrb %nin% varlist)) {
cli::cli_inform(
c(
"Density plot could not be produced for variable(s):",
" " = paste(vrb[which(vrb %nin% varlist)], collapse = ", "),
"x" = "No density plot available."
)
)
if (any(vrb %in% varlist)) {
vrb <- vrb[which(vrb %in% varlist)]
} else {
cli::cli_abort(c(
"x" = "None of the variables are imputed.",
"No plots can be produced."
))
}
}

# Reshape the mids object to be the best ggplot data possible
imps_ggplot <- reshape_mids(data)

# Keep only observed caases
imps_ggplot_obs <- imps_ggplot %>%
dplyr::filter(
.imp == 0,
.data$miss == FALSE
)

# Keep only imputed values
imps_ggplot_imps <- imps_ggplot %>%
dplyr::filter(
.imp != 0,
.data$miss == TRUE
)

# Stack observed and imputed
imps_ggplot <- rbind(imps_ggplot_obs, imps_ggplot_imps)

# Create a grouping variable for the densities
imps_ggplot$group <- paste0(imps_ggplot$.imp, imps_ggplot$miss)

# Make the grouping variable a factor with meaningful labels
imps_ggplot$group <- factor(
imps_ggplot$group,
levels = unique(imps_ggplot$group),
labels = c(
"Observed data",
paste0("Imputation chain ", seq(1:(length(unique(imps_ggplot$group)) - 1)))
)
)

# Create empty list
plot_list <- list()

# Make plot for 1 variable at the time
for (i in 1:length(vrb)) {
# Active data for plot
imps_ggplot_active <- imps_ggplot %>%
dplyr::filter(.data$variable %in% vrb[i])

# Base plot
plot_list[[i]] <- imps_ggplot_active %>%
ggplot2::ggplot(
ggplot2::aes(
x = value,
color = .data$group
)
) +
ggplot2::geom_density(
adjust = 1
)

# Panel structure
if (panels == TRUE) {
plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid(
cols = ggplot2::vars(.data$group),
scales = "free",
switch = "y"
)
} else {
plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid(
cols = ggplot2::vars(.data$variable),
scales = "free",
switch = "y"
)
}

# Cosmetics
plot_list[[i]] <- plot_list[[i]] +
ggplot2::scale_color_manual(
values = c(
"#006CC2B3",
rep("#B61A51B3", length(unique(imps_ggplot$.imp)) - 1)
)
) +
ggplot2::scale_x_continuous(
breaks = seq(
from = min(imps_ggplot_active$value),
to = max(imps_ggplot_active$value),
length = 5
),
limits = c(
min(imps_ggplot_active$value) - stats::sd(imps_ggplot_active$value),
max(imps_ggplot_active$value) + stats::sd(imps_ggplot_active$value)
)
) +
ggplot2::labs(x = vrb[i]) +
theme_mice() +
ggplot2::theme(
strip.text.x = ggplot2::element_blank(),
strip.background = ggplot2::element_blank(),
strip.placement = "outside",
strip.switch.pad.wrap = ggplot2::unit(0, "cm"),
legend.position = "none"
)
}

# Collect plot together with patchwork
if(length(plot_list) > 1){
if (panels == TRUE) {
patchwork::wrap_plots(plot_list, nrow = min(c(length(vrb), 5)))
} else {
patchwork::wrap_plots(plot_list, ncol = min(c(length(vrb), 5)))
}
} else {
plot_list[[1]]
}
}
70 changes: 70 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,73 @@ verify_data <- function(data,
}
}
}

#' Utils function to reshape mids object into a super-long format for density plots
#'
#' @param data A mids object.
#' @details
#'
#' ggplot works best with long format data. This function takes a mids object and returns a super-long format version of the mids object that is easy to process with ggplot
#' This super-long format has 5 columns:
#' - .imp: reference imputation data set (same as in mids object)
#' - .id: observation id (same as in mids object)
#' - miss: response vector (logical vector indicating whether the case was missing (1) or not (0))
#' - variable: vector of names of the variables in the original data set
#' - value: the data point (observed, na, or imputation value)
#'
#' This structure makes it easy to group, color, filter, and facet by the two most important factors for density plots: missingness and variables.
#'
#' @return data.frame in super-long format
#'
#' @examples
#' imp <- mice::mice(mice::nhanes, print = FALSE)
#' reshape_mids(data = imp)
#' @keywords internal
#' @noRd
reshape_mids <- function(data) {

# Create an empty list to store intermediate objects
shelf <- list()

# Extract imputations in long format
imps <- data.frame(mice::complete(data, "long", include = TRUE))

# Define column names for melting (depends on mice long format column names)
id_vars <- colnames(imps)[1:2]

# Define the response matrix
Rmat <- data$where

# Loop over the variables in the original data
for (j in 1:ncol(Rmat)) {

# Check if there are missing values
if (any(Rmat[, j])) {

# What variable are we considering
J <- colnames(Rmat)[j]

# Keep only the .imp identifier and the variable value
active_data <- imps[, c(id_vars, J)]

# Force active variable to numeric
active_data[, J] <- as.numeric(active_data[, J])

# attach the response indicator
active_data <- cbind(
active_data,
miss = Rmat[, J]
)

# Melt values
ad_melt <- reshape2::melt(active_data, id.vars = c(id_vars, "miss"))

# Store the result
shelf[[j]] <- ad_melt
}
}

# Combine the results from the many variables
do.call(rbind, shelf)

}
25 changes: 25 additions & 0 deletions man/plot_density.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/test-plot_density.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# create test objects
dat <- mice::nhanes
imp <- mice::mice(dat, printFlag = FALSE)

# tests
test_that("plot_density creates ggplot object", {
expect_s3_class(plot_density(imp), "ggplot")
expect_s3_class(plot_density(imp, vrb = "bmi"), "ggplot")
expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot")
expect_s3_class(plot_density(imp, vrb = "all"), "ggplot")
expect_s3_class(plot_density(imp, vrb = bmi), "ggplot")
expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot")
expect_s3_class(plot_density(imp, vrb = c(bmi, hyp)), "ggplot")
expect_s3_class(plot_density(imp, vrb = c("bmi"), panels = TRUE), "ggplot")
expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp"), panels = TRUE), "ggplot")
expect_s3_class(plot_density(imp, vrb = "all", panels = TRUE), "ggplot")
})

test_that("plot_density returns error with incorrect argument(s)", {
expect_error(plot_density(dat))
expect_error(plot_density(imp, vrb = "test"))
expect_error(plot_density(imp, vrb = "age"))
expect_message(plot_density(imp, vrb = c("age", "bmi")))
})
14 changes: 14 additions & 0 deletions tests/testthat/test-reshape_mids.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# create test objects
dat <- mice::nhanes
m <- 3
imp <- mice::mice(dat, m = m, printFlag = FALSE)
p <- sum(colSums(imp$where) > 0)

# tests
test_that("returns correct output", {
expect_s3_class(reshape_mids(imp), "data.frame")
expect_equal(
dim(reshape_mids(imp)),
c(nrow(dat) * (m + 1) * p, 5)
)
})
24 changes: 24 additions & 0 deletions vignettes/old_friends.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,23 @@ ggmice(imp, aes(x = hgt, group = .imp, size = .where)) +
guide = "none"
) +
theme(legend.position = "none")

# Can make the same plot
mice::densityplot(imp, ~hgt)
plot_density(data = imp, vrb = "hgt", panels = FALSE)

# Can be broken down by panels
mice::densityplot(imp, ~ hgt | .imp)
plot_density(data = imp, vrb = "hgt", panels = TRUE)

# Can plot arbitrary variables
plot_density(data = imp, vrb = c("wgt", "bmi", "phb"), panels = FALSE)
plot_density(data = imp, vrb = c("wgt", "bmi", "phb"), panels = TRUE)

# Can plot all variables
mice::densityplot(imp)
plot_density(data = imp)

```

# `fluxplot`
Expand Down Expand Up @@ -217,6 +234,13 @@ p <- map(vrb, ~ {
})
wrap_plots(p, guides = "collect") &
theme(legend.position = "bottom")

# Can make the same plot
plot_density(data = imp)

# And if needed can break down by data set
plot_density(data = imp, panels = TRUE)

```

Display strip plots for all variables.
Expand Down