From 70391fa8a1394473109f88c5329524f8f921ed02 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:31:01 +0200 Subject: [PATCH 01/14] feat: new plot density function --- R/plot_density.R | 168 +++++++++++++++++++++++++++++ tests/testthat/test-plot_density.R | 27 +++++ 2 files changed, 195 insertions(+) create mode 100644 R/plot_density.R create mode 100644 tests/testthat/test-plot_density.R diff --git a/R/plot_density.R b/R/plot_density.R new file mode 100644 index 00000000..c7a127e0 --- /dev/null +++ b/R/plot_density.R @@ -0,0 +1,168 @@ +#' Plot the density of observed and imputed values +#' +#' @param data An object of class [mice::mids]. +#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". +#' @param panels String, vector specifying whether the density plots should be returned in a single panel or not +#' +#' @return An object of class [ggplot2::ggplot]. +#' +#' @examples +#' imp <- mice::mice(mice::nhanes, print = FALSE) +#' plot_density(data = imp) +#' @export +plot_density <- function(data, vrb = "all", panels = "multiple") { + verify_data(data, imp = TRUE) + if (is.null(data$chainMean) && is.null(data$chainVar)) { + cli::cli_abort("No convergence diagnostics found", call. = FALSE) + } + + # Create a matrix of response indicators + RI <- is.na(data$data) + + # Select variables + if (vrb == "all") { + varlist <- colnames(RI[, colSums(RI) != 0]) + } else { + varlist <- vrb + } + + # Extract imputations in long format + imps <- data.frame(mice::complete(data, "long", include = TRUE)) + + # Create an empty list to store intermediate objects + shelf <- list() + + # Loop over the variables + for (j in 1:ncol(RI)) { + if (any(RI[, j])) { + # What variable are we considering + J <- colnames(RI)[j] + + # Keep only the .imp identifier and the variable value + active_data <- imps[, c(".imp", J)] + + # Force active variable to numeric + active_data[, J] <- as.numeric(active_data[, J]) + + # attach the response indicator + active_data <- cbind( + active_data, + miss = RI[, J] + ) + + # Melt values + ad_melt <- reshape2::melt(active_data, id.vars = c(".imp", "miss")) + + # Filter by dropping all of the cases that are observed from the non 0 groups + ad_melt_imps <- ad_melt %>% + dplyr::filter( + .imp != 0, + miss == TRUE + ) + + # Filter by dropping all cases that are missing in the observed data + ad_melt_obs <- ad_melt %>% + dplyr::filter( + .imp == 0, + miss == FALSE + ) + + # Store the result + shelf[[j]] <- rbind(ad_melt_obs, ad_melt_imps) + } + } + + # Combine the results from the many variables + imps_ggplot <- do.call(rbind, shelf) + + # Create a grouping variable for the densities + imps_ggplot$group <- paste0(imps_ggplot$.imp, imps_ggplot$miss) + + # Make the grouping variable a factor with meaningful labels + imps_ggplot$group <- factor( + imps_ggplot$group, + levels = unique(imps_ggplot$group), + labels = c( + "Observed data", + paste0("Imputation chain ", seq(1:(length(unique(imps_ggplot$group)) - 1))) + ) + ) + + # Active data for plot + imps_ggplot_active <- imps_ggplot %>% + dplyr::filter(.data$variable %in% varlist) + + # Create empty list + plot_list <- list() + + # Make plot for 1 variable at the time + for (i in 1:length(varlist)) { + # Active data for plot + imps_ggplot_active <- imps_ggplot %>% + dplyr::filter(variable %in% varlist[i]) + + # Base plot + plot_list[[i]] <- imps_ggplot_active %>% + ggplot2::ggplot( + ggplot2::aes( + x = value, + linetype = group + ) + ) + + ggplot2::geom_density( + adjust = 1 + ) + + # Panel structure + if (panels == TRUE) { + plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid( + cols = ggplot2::vars(group), + rows = ggplot2::vars(variable), + scales = "free", + switch = "y" + ) + } else { + plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid( + cols = ggplot2::vars(variable), + scales = "free", + switch = "y" + ) + } + + # Cosmetics + plot_list[[i]] <- plot_list[[i]] + + ggplot2::scale_linetype_manual( + values = c( + 5, + rep(1, length(unique(imps_ggplot$.imp)) - 1) + ) + ) + + ggplot2::scale_y_continuous( + position = "right" + ) + + ggplot2::scale_x_continuous( + breaks = seq( + from = min(imps_ggplot_active$value), + to = max(imps_ggplot_active$value), + length = 5 + ), + limits = c( + min(imps_ggplot_active$value) - sd(imps_ggplot_active$value), + max(imps_ggplot_active$value) + sd(imps_ggplot_active$value) + ) + ) + + theme_mice() + + ggplot2::theme( + strip.background = ggplot2::element_blank(), + strip.placement = "outside", + strip.switch.pad.wrap = ggplot2::unit(0, "cm"), + legend.position = "none" + ) + } + # Collect plot together with patchwork + if (panels == TRUE) { + patchwork::wrap_plots(plot_list, nrow = min(c(length(varlist), 5))) + } else { + patchwork::wrap_plots(plot_list, ncol = min(c(length(varlist), 5))) + } +} \ No newline at end of file diff --git a/tests/testthat/test-plot_density.R b/tests/testthat/test-plot_density.R new file mode 100644 index 00000000..01e110b1 --- /dev/null +++ b/tests/testthat/test-plot_density.R @@ -0,0 +1,27 @@ +# create test objects +dat <- mice::nhanes +imp <- mice::mice(dat, printFlag = FALSE) +head(dat) +mice::densityplot(imp, ~ bmi | .imp) +plot_density(data = imp, vrb = "bmi", panels = TRUE) + +mice::densityplot(imp) +plot_density(data = imp, vrb = "all", panels = FALSE) + +# tests +test_that("plot_density creates ggplot object", { + expect_s3_class(plot_density(imp), "ggplot") + expect_s3_class(plot_density(imp, vrb = "bmi"), "ggplot") + expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot") + expect_s3_class(plot_density(imp, vrb = "all"), "ggplot") + expect_s3_class(plot_density(imp, vrb = bmi), "ggplot") + expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot") + expect_s3_class(plot_density(imp, vrb = c(bmi, hyp)), "ggplot") +}) + +test_that("plot_density returns error with incorrect argument(s)", { + expect_error(plot_density(dat)) + expect_error(plot_density(imp, vrb = "test")) + expect_error(plot_density(imp, vrb = "age")) + expect_message(plot_density(imp, vrb = c("age", "bmi"))) +}) From 9d4dc3ca1b4d633b6bb5f089216d50ea9c34a538 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:40:29 +0200 Subject: [PATCH 02/14] fix: color codes instead of line types --- R/plot_density.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/plot_density.R b/R/plot_density.R index c7a127e0..c641a7c9 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -106,7 +106,7 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { ggplot2::ggplot( ggplot2::aes( x = value, - linetype = group + color = group ) ) + ggplot2::geom_density( @@ -131,15 +131,12 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { # Cosmetics plot_list[[i]] <- plot_list[[i]] + - ggplot2::scale_linetype_manual( + ggplot2::scale_color_manual( values = c( - 5, - rep(1, length(unique(imps_ggplot$.imp)) - 1) + "#006CC2B3", + rep("#B61A51B3", length(unique(imps_ggplot$.imp)) - 1) ) ) + - ggplot2::scale_y_continuous( - position = "right" - ) + ggplot2::scale_x_continuous( breaks = seq( from = min(imps_ggplot_active$value), From b40c6dd1fbadfdba6c7215487c826a83c9d87951 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:45:01 +0200 Subject: [PATCH 03/14] improve: replace RI with where --- R/plot_density.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/R/plot_density.R b/R/plot_density.R index c641a7c9..141a7f10 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -16,12 +16,9 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { cli::cli_abort("No convergence diagnostics found", call. = FALSE) } - # Create a matrix of response indicators - RI <- is.na(data$data) - # Select variables if (vrb == "all") { - varlist <- colnames(RI[, colSums(RI) != 0]) + varlist <- colnames(data$where[, colSums(data$where) != 0]) } else { varlist <- vrb } @@ -33,10 +30,10 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { shelf <- list() # Loop over the variables - for (j in 1:ncol(RI)) { - if (any(RI[, j])) { + for (j in 1:ncol(data$where)) { + if (any(data$where[, j])) { # What variable are we considering - J <- colnames(RI)[j] + J <- colnames(data$where)[j] # Keep only the .imp identifier and the variable value active_data <- imps[, c(".imp", J)] @@ -47,7 +44,7 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { # attach the response indicator active_data <- cbind( active_data, - miss = RI[, J] + miss = data$where[, J] ) # Melt values From dc7ff37c0a8d40df4b748110cce137011f3a4ffa Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:45:22 +0200 Subject: [PATCH 04/14] fix: single plot with collect framework --- R/plot_density.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/plot_density.R b/R/plot_density.R index 141a7f10..227b32e3 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -154,9 +154,13 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { ) } # Collect plot together with patchwork - if (panels == TRUE) { - patchwork::wrap_plots(plot_list, nrow = min(c(length(varlist), 5))) + if(length(plot_list) > 1){ + if (panels == TRUE) { + patchwork::wrap_plots(plot_list, nrow = min(c(length(varlist), 5))) + } else { + patchwork::wrap_plots(plot_list, ncol = min(c(length(varlist), 5))) + } } else { - patchwork::wrap_plots(plot_list, ncol = min(c(length(varlist), 5))) + plot_list[[1]] } } \ No newline at end of file From 8ca33ff19e64dedcac1da4278cb793dc8a7a0dc6 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:08:10 +0200 Subject: [PATCH 05/14] fix: labelling of plots --- R/plot_density.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/plot_density.R b/R/plot_density.R index 227b32e3..327d0a5c 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -85,10 +85,6 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { ) ) - # Active data for plot - imps_ggplot_active <- imps_ggplot %>% - dplyr::filter(.data$variable %in% varlist) - # Create empty list plot_list <- list() @@ -145,8 +141,10 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { max(imps_ggplot_active$value) + sd(imps_ggplot_active$value) ) ) + + ggplot2::labs(x = varlist[i]) + theme_mice() + ggplot2::theme( + strip.text.x = ggplot2::element_blank(), strip.background = ggplot2::element_blank(), strip.placement = "outside", strip.switch.pad.wrap = ggplot2::unit(0, "cm"), From 3ce0aba6b37f00606ee992aa5defa123ed53a4eb Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:08:46 +0200 Subject: [PATCH 06/14] fix: get rid of useless lines in testing of plot_density --- tests/testthat/test-plot_density.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/testthat/test-plot_density.R b/tests/testthat/test-plot_density.R index 01e110b1..2e24ad0e 100644 --- a/tests/testthat/test-plot_density.R +++ b/tests/testthat/test-plot_density.R @@ -1,12 +1,6 @@ # create test objects dat <- mice::nhanes imp <- mice::mice(dat, printFlag = FALSE) -head(dat) -mice::densityplot(imp, ~ bmi | .imp) -plot_density(data = imp, vrb = "bmi", panels = TRUE) - -mice::densityplot(imp) -plot_density(data = imp, vrb = "all", panels = FALSE) # tests test_that("plot_density creates ggplot object", { From 65f05ce1bc6e184a6e4fa7341fdaaa3c2ee27671 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:01:18 +0100 Subject: [PATCH 07/14] fix: panels error with many variables --- R/plot_density.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_density.R b/R/plot_density.R index 327d0a5c..0378520c 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -110,7 +110,6 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { if (panels == TRUE) { plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid( cols = ggplot2::vars(group), - rows = ggplot2::vars(variable), scales = "free", switch = "y" ) @@ -151,6 +150,7 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { legend.position = "none" ) } + # Collect plot together with patchwork if(length(plot_list) > 1){ if (panels == TRUE) { From 10ee35551210b89256c1efbd0b98af04cafcf89f Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:14:35 +0100 Subject: [PATCH 08/14] fix: correct handling of vrb argument --- R/plot_density.R | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/R/plot_density.R b/R/plot_density.R index 0378520c..f3285028 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -15,12 +15,33 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { if (is.null(data$chainMean) && is.null(data$chainVar)) { cli::cli_abort("No convergence diagnostics found", call. = FALSE) } + + # List all variables with missing values + vrb <- substitute(vrb) + varlist <- colnames(data$where[, colSums(data$where) != 0]) # Select variables - if (vrb == "all") { - varlist <- colnames(data$where[, colSums(data$where) != 0]) + if (as.character(vrb)[1] == "all") { + vrb <- varlist } else { - varlist <- vrb + vrb <- names(dplyr::select(data$data, {{ vrb }})) + } + if (any(vrb %nin% varlist)) { + cli::cli_inform( + c( + "Density plot could not be produced for variable(s):", + " " = paste(vrb[which(vrb %nin% varlist)], collapse = ", "), + "x" = "No density plot available." + ) + ) + if (any(vrb %in% varlist)) { + vrb <- vrb[which(vrb %in% varlist)] + } else { + cli::cli_abort(c( + "x" = "None of the variables are imputed.", + "No plots can be produced." + )) + } } # Extract imputations in long format @@ -89,10 +110,10 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { plot_list <- list() # Make plot for 1 variable at the time - for (i in 1:length(varlist)) { + for (i in 1:length(vrb)) { # Active data for plot imps_ggplot_active <- imps_ggplot %>% - dplyr::filter(variable %in% varlist[i]) + dplyr::filter(variable %in% vrb[i]) # Base plot plot_list[[i]] <- imps_ggplot_active %>% @@ -140,7 +161,7 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { max(imps_ggplot_active$value) + sd(imps_ggplot_active$value) ) ) + - ggplot2::labs(x = varlist[i]) + + ggplot2::labs(x = vrb[i]) + theme_mice() + ggplot2::theme( strip.text.x = ggplot2::element_blank(), @@ -150,13 +171,13 @@ plot_density <- function(data, vrb = "all", panels = "multiple") { legend.position = "none" ) } - + # Collect plot together with patchwork if(length(plot_list) > 1){ if (panels == TRUE) { - patchwork::wrap_plots(plot_list, nrow = min(c(length(varlist), 5))) + patchwork::wrap_plots(plot_list, nrow = min(c(length(vrb), 5))) } else { - patchwork::wrap_plots(plot_list, ncol = min(c(length(varlist), 5))) + patchwork::wrap_plots(plot_list, ncol = min(c(length(vrb), 5))) } } else { plot_list[[1]] From 8607496543c9f52869c9e741b4eda9aa950692a1 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:16:27 +0100 Subject: [PATCH 09/14] fix: correct value for panels argument --- R/plot_density.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_density.R b/R/plot_density.R index f3285028..b328206d 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -2,7 +2,7 @@ #' #' @param data An object of class [mice::mids]. #' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". -#' @param panels String, vector specifying whether the density plots should be returned in a single panel or not +#' @param panels Logical, vector of length 1 specifying whether the density plots should be broken into panels (TRUE) or not (FALSE) #' #' @return An object of class [ggplot2::ggplot]. #' @@ -10,7 +10,7 @@ #' imp <- mice::mice(mice::nhanes, print = FALSE) #' plot_density(data = imp) #' @export -plot_density <- function(data, vrb = "all", panels = "multiple") { +plot_density <- function(data, vrb = "all", panels = FALSE) { verify_data(data, imp = TRUE) if (is.null(data$chainMean) && is.null(data$chainVar)) { cli::cli_abort("No convergence diagnostics found", call. = FALSE) From f2850093d33d9c0c5a5b619ebe34249f40a53841 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:17:41 +0100 Subject: [PATCH 10/14] tests: add test for the panel argument --- tests/testthat/test-plot_density.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-plot_density.R b/tests/testthat/test-plot_density.R index 2e24ad0e..cbad1809 100644 --- a/tests/testthat/test-plot_density.R +++ b/tests/testthat/test-plot_density.R @@ -11,6 +11,9 @@ test_that("plot_density creates ggplot object", { expect_s3_class(plot_density(imp, vrb = bmi), "ggplot") expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot") expect_s3_class(plot_density(imp, vrb = c(bmi, hyp)), "ggplot") + expect_s3_class(plot_density(imp, vrb = c("bmi"), panels = TRUE), "ggplot") + expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp"), panels = TRUE), "ggplot") + expect_s3_class(plot_density(imp, vrb = "all", panels = TRUE), "ggplot") }) test_that("plot_density returns error with incorrect argument(s)", { From 713943522e8527a8912041fb3d23f0f0f43927d0 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:23:03 +0100 Subject: [PATCH 11/14] fix: problems found by R CMD check --- DESCRIPTION | 1 + NAMESPACE | 1 + R/plot_density.R | 16 ++++++++-------- man/plot_density.Rd | 25 +++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 man/plot_density.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 1ce588d1..65798792 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Imports: magrittr, mice, purrr, + reshape2, rlang, stats, stringr, diff --git a/NAMESPACE b/NAMESPACE index 5a976a40..535637ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(bwplot) export(densityplot) export(ggmice) export(plot_corr) +export(plot_density) export(plot_flux) export(plot_pattern) export(plot_pred) diff --git a/R/plot_density.R b/R/plot_density.R index b328206d..41d3d4c9 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -75,14 +75,14 @@ plot_density <- function(data, vrb = "all", panels = FALSE) { ad_melt_imps <- ad_melt %>% dplyr::filter( .imp != 0, - miss == TRUE + .data$miss == TRUE ) # Filter by dropping all cases that are missing in the observed data ad_melt_obs <- ad_melt %>% dplyr::filter( .imp == 0, - miss == FALSE + .data$miss == FALSE ) # Store the result @@ -113,14 +113,14 @@ plot_density <- function(data, vrb = "all", panels = FALSE) { for (i in 1:length(vrb)) { # Active data for plot imps_ggplot_active <- imps_ggplot %>% - dplyr::filter(variable %in% vrb[i]) + dplyr::filter(.data$variable %in% vrb[i]) # Base plot plot_list[[i]] <- imps_ggplot_active %>% ggplot2::ggplot( ggplot2::aes( x = value, - color = group + color = .data$group ) ) + ggplot2::geom_density( @@ -130,13 +130,13 @@ plot_density <- function(data, vrb = "all", panels = FALSE) { # Panel structure if (panels == TRUE) { plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid( - cols = ggplot2::vars(group), + cols = ggplot2::vars(.data$group), scales = "free", switch = "y" ) } else { plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid( - cols = ggplot2::vars(variable), + cols = ggplot2::vars(.data$variable), scales = "free", switch = "y" ) @@ -157,8 +157,8 @@ plot_density <- function(data, vrb = "all", panels = FALSE) { length = 5 ), limits = c( - min(imps_ggplot_active$value) - sd(imps_ggplot_active$value), - max(imps_ggplot_active$value) + sd(imps_ggplot_active$value) + min(imps_ggplot_active$value) - stats::sd(imps_ggplot_active$value), + max(imps_ggplot_active$value) + stats::sd(imps_ggplot_active$value) ) ) + ggplot2::labs(x = vrb[i]) + diff --git a/man/plot_density.Rd b/man/plot_density.Rd new file mode 100644 index 00000000..d58e95d6 --- /dev/null +++ b/man/plot_density.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_density.R +\name{plot_density} +\alias{plot_density} +\title{Plot the density of observed and imputed values} +\usage{ +plot_density(data, vrb = "all", panels = FALSE) +} +\arguments{ +\item{data}{An object of class \link[mice:mids-class]{mice::mids}.} + +\item{vrb}{String, vector, or unquoted expression with variable name(s), default is "all".} + +\item{panels}{Logical, vector of length 1 specifying whether the density plots should be broken into panels (TRUE) or not (FALSE)} +} +\value{ +An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. +} +\description{ +Plot the density of observed and imputed values +} +\examples{ +imp <- mice::mice(mice::nhanes, print = FALSE) +plot_density(data = imp) +} From cdf4c4ba89b6e6776f3e6deaf1d765eab7f4b287 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 18:09:07 +0100 Subject: [PATCH 12/14] feat: make a reshaping function --- R/plot_density.R | 65 ++++++++------------------- R/utils.R | 70 ++++++++++++++++++++++++++++++ tests/testthat/test-reshape_mids.R | 14 ++++++ 3 files changed, 102 insertions(+), 47 deletions(-) create mode 100644 tests/testthat/test-reshape_mids.R diff --git a/R/plot_density.R b/R/plot_density.R index 41d3d4c9..a0411e8a 100644 --- a/R/plot_density.R +++ b/R/plot_density.R @@ -44,54 +44,25 @@ plot_density <- function(data, vrb = "all", panels = FALSE) { } } - # Extract imputations in long format - imps <- data.frame(mice::complete(data, "long", include = TRUE)) - - # Create an empty list to store intermediate objects - shelf <- list() - - # Loop over the variables - for (j in 1:ncol(data$where)) { - if (any(data$where[, j])) { - # What variable are we considering - J <- colnames(data$where)[j] - - # Keep only the .imp identifier and the variable value - active_data <- imps[, c(".imp", J)] - - # Force active variable to numeric - active_data[, J] <- as.numeric(active_data[, J]) - - # attach the response indicator - active_data <- cbind( - active_data, - miss = data$where[, J] - ) - - # Melt values - ad_melt <- reshape2::melt(active_data, id.vars = c(".imp", "miss")) - - # Filter by dropping all of the cases that are observed from the non 0 groups - ad_melt_imps <- ad_melt %>% - dplyr::filter( - .imp != 0, - .data$miss == TRUE - ) - - # Filter by dropping all cases that are missing in the observed data - ad_melt_obs <- ad_melt %>% - dplyr::filter( - .imp == 0, - .data$miss == FALSE - ) - - # Store the result - shelf[[j]] <- rbind(ad_melt_obs, ad_melt_imps) - } - } + # Reshape the mids object to be the best ggplot data possible + imps_ggplot <- reshape_mids(data) + + # Keep only observed caases + imps_ggplot_obs <- imps_ggplot %>% + dplyr::filter( + .imp == 0, + .data$miss == FALSE + ) + + # Keep only imputed values + imps_ggplot_imps <- imps_ggplot %>% + dplyr::filter( + .imp != 0, + .data$miss == TRUE + ) - # Combine the results from the many variables - imps_ggplot <- do.call(rbind, shelf) + # Stack observed and imputed + imps_ggplot <- rbind(imps_ggplot_obs, imps_ggplot_imps) # Create a grouping variable for the densities imps_ggplot$group <- paste0(imps_ggplot$.imp, imps_ggplot$miss) diff --git a/R/utils.R b/R/utils.R index 67fcf484..cb2114d5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -104,3 +104,73 @@ verify_data <- function(data, } } } + +#' Utils function to reshape mids object into a super-long format for density plots +#' +#' @param data A mids object. +#' @details +#' +#' ggplot works best with long format data. This function takes a mids object and returns a super-long format version of the mids object that is easy to process with ggplot +#' This super-long format has 5 columns: +#' - .imp: reference imputation data set (same as in mids object) +#' - .id: observation id (same as in mids object) +#' - miss: response vector (logical vector indicating whether the case was missing (1) or not (0)) +#' - variable: vector of names of the variables in the original data set +#' - value: the data point (observed, na, or imputation value) +#' +#' This structure makes it easy to group, color, filter, and facet by the two most important factors for density plots: missingness and variables. +#' +#' @return data.frame in super-long format +#' +#' @examples +#' imp <- mice::mice(mice::nhanes, print = FALSE) +#' reshape_mids(data = imp) +#' @keywords internal +#' @noRd +reshape_mids <- function(data) { + + # Create an empty list to store intermediate objects + shelf <- list() + + # Extract imputations in long format + imps <- data.frame(mice::complete(data, "long", include = TRUE)) + + # Define column names for melting (depends on mice long format column names) + id_vars <- colnames(imps)[1:2] + + # Define the response matrix + Rmat <- data$where + + # Loop over the variables in the original data + for (j in 1:ncol(Rmat)) { + + # Check if there are missing values + if (any(Rmat[, j])) { + + # What variable are we considering + J <- colnames(Rmat)[j] + + # Keep only the .imp identifier and the variable value + active_data <- imps[, c(id_vars, J)] + + # Force active variable to numeric + active_data[, J] <- as.numeric(active_data[, J]) + + # attach the response indicator + active_data <- cbind( + active_data, + miss = Rmat[, J] + ) + + # Melt values + ad_melt <- reshape2::melt(active_data, id.vars = c(id_vars, "miss")) + + # Store the result + shelf[[j]] <- ad_melt + } + } + + # Combine the results from the many variables + do.call(rbind, shelf) + +} \ No newline at end of file diff --git a/tests/testthat/test-reshape_mids.R b/tests/testthat/test-reshape_mids.R new file mode 100644 index 00000000..005fdd93 --- /dev/null +++ b/tests/testthat/test-reshape_mids.R @@ -0,0 +1,14 @@ +# create test objects +dat <- mice::nhanes +m <- 3 +imp <- mice::mice(dat, m = m, printFlag = FALSE) +p <- sum(colSums(imp$where) > 0) + +# tests +test_that("returns correct output", { + expect_s3_class(reshape_mids(imp), "data.frame") + expect_equal( + dim(reshape_mids(imp)), + c(nrow(dat) * (m + 1) * p, 5) + ) +}) From 5cc4854cb351d90024783e59d8a7e740b1bd6328 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 18:24:39 +0100 Subject: [PATCH 13/14] docs: add alternative plot_density examples --- vignettes/old_friends.Rmd | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/vignettes/old_friends.Rmd b/vignettes/old_friends.Rmd index b054dfa8..ee0662c4 100644 --- a/vignettes/old_friends.Rmd +++ b/vignettes/old_friends.Rmd @@ -74,6 +74,33 @@ ggmice(imp, aes(x = hgt, group = .imp, size = .where)) + guide = "none" ) + theme(legend.position = "none") + +# original plot +mice::densityplot(imp) +# ggmice equivalent +p <- map(vrb, ~ { + ggmice(imp, aes(x = .data[[.x]], group = .imp)) + + geom_density() +}) +wrap_plots(p, guides = "collect") & + theme(legend.position = "bottom") + +# Can make the same plot +mice::densityplot(imp, ~hgt) +plot_density(data = imp, vrb = "hgt", panels = FALSE) + +# Can be broken down by panels +mice::densityplot(imp, ~ hgt | .imp) +plot_density(data = imp, vrb = "hgt", panels = TRUE) + +# Can plot arbitrary variables +plot_density(data = imp, vrb = c("wgt", "bmi", "phb"), panels = FALSE) +plot_density(data = imp, vrb = c("wgt", "bmi", "phb"), panels = TRUE) + +# Can plot all variables +mice::densityplot(imp) +plot_density(data = imp) + ``` # `fluxplot` @@ -217,6 +244,13 @@ p <- map(vrb, ~ { }) wrap_plots(p, guides = "collect") & theme(legend.position = "bottom") + +# Can make the same plot +plot_density(data = imp) + +# And if needed can break down by data set +plot_density(data = imp, panels = TRUE) + ``` Display strip plots for all variables. From 5d3b113bb43aa19b7be5d7395be1ab075e173e01 Mon Sep 17 00:00:00 2001 From: Edo <38205413+EdoardoCostantini@users.noreply.github.com> Date: Tue, 31 Oct 2023 19:13:43 +0100 Subject: [PATCH 14/14] fix: remove duplicated lines in vignette --- vignettes/old_friends.Rmd | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vignettes/old_friends.Rmd b/vignettes/old_friends.Rmd index ee0662c4..958ae295 100644 --- a/vignettes/old_friends.Rmd +++ b/vignettes/old_friends.Rmd @@ -75,16 +75,6 @@ ggmice(imp, aes(x = hgt, group = .imp, size = .where)) + ) + theme(legend.position = "none") -# original plot -mice::densityplot(imp) -# ggmice equivalent -p <- map(vrb, ~ { - ggmice(imp, aes(x = .data[[.x]], group = .imp)) + - geom_density() -}) -wrap_plots(p, guides = "collect") & - theme(legend.position = "bottom") - # Can make the same plot mice::densityplot(imp, ~hgt) plot_density(data = imp, vrb = "hgt", panels = FALSE)