amices · EdoardoCostantini · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -27,6 +27,7 @@ Imports:
     magrittr,
     mice,
     purrr,
+    reshape2,
     rlang,
     stats,
     stringr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(bwplot)
 export(densityplot)
 export(ggmice)
 export(plot_corr)
+export(plot_density)
 export(plot_flux)
 export(plot_pattern)
 export(plot_pred)

diff --git a/R/plot_density.R b/R/plot_density.R
@@ -0,0 +1,156 @@
+#' Plot the density of observed and imputed values
+#'
+#' @param data An object of class [mice::mids].
+#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all".
+#' @param panels Logical, vector of length 1 specifying whether the density plots should be broken into panels (TRUE) or not (FALSE)
+#'
+#' @return An object of class [ggplot2::ggplot].
+#'
+#' @examples
+#' imp <- mice::mice(mice::nhanes, print = FALSE)
+#' plot_density(data = imp)
+#' @export
+plot_density <- function(data, vrb = "all", panels = FALSE) {
+    verify_data(data, imp = TRUE)
+    if (is.null(data$chainMean) && is.null(data$chainVar)) {
+        cli::cli_abort("No convergence diagnostics found", call. = FALSE)
+    }
+
+    # List all variables with missing values
+    vrb <- substitute(vrb)
+    varlist <- colnames(data$where[, colSums(data$where) != 0])
+
+    # Select variables
+    if (as.character(vrb)[1] == "all") {
+        vrb <- varlist
+    } else {
+        vrb <- names(dplyr::select(data$data, {{ vrb }}))
+    }
+    if (any(vrb %nin% varlist)) {
+        cli::cli_inform(
+            c(
+                "Density plot could not be produced for variable(s):",
+                " " = paste(vrb[which(vrb %nin% varlist)], collapse = ", "),
+                "x" = "No density plot available."
+            )
+        )
+        if (any(vrb %in% varlist)) {
+            vrb <- vrb[which(vrb %in% varlist)]
+        } else {
+            cli::cli_abort(c(
+                "x" = "None of the variables are imputed.",
+                "No plots can be produced."
+            ))
+        }
+    }
+
+    # Reshape the mids object to be the best ggplot data possible
+    imps_ggplot <- reshape_mids(data)
+
+    # Keep only observed caases
+    imps_ggplot_obs <- imps_ggplot %>%
+      dplyr::filter(
+        .imp == 0,
+        .data$miss == FALSE
+      )
+
+    # Keep only imputed values
+    imps_ggplot_imps <- imps_ggplot %>%
+        dplyr::filter(
+            .imp != 0,
+            .data$miss == TRUE
+        )
+
+    # Stack observed and imputed
+    imps_ggplot <- rbind(imps_ggplot_obs, imps_ggplot_imps)
+
+    # Create a grouping variable for the densities
+    imps_ggplot$group <- paste0(imps_ggplot$.imp, imps_ggplot$miss)
+
+    # Make the grouping variable a factor with meaningful labels
+    imps_ggplot$group <- factor(
+        imps_ggplot$group,
+        levels = unique(imps_ggplot$group),
+        labels = c(
+            "Observed data",
+            paste0("Imputation chain ", seq(1:(length(unique(imps_ggplot$group)) - 1)))
+        )
+    )
+
+    # Create empty list
+    plot_list <- list()
+
+    # Make plot for 1 variable at the time
+    for (i in 1:length(vrb)) {
+        # Active data for plot
+        imps_ggplot_active <- imps_ggplot %>%
+            dplyr::filter(.data$variable %in% vrb[i])
+
+        # Base plot
+        plot_list[[i]] <- imps_ggplot_active %>%
+            ggplot2::ggplot(
+                ggplot2::aes(
+                    x = value,
+                    color = .data$group
+                )
+            ) +
+            ggplot2::geom_density(
+                adjust = 1
+            )
+
+        # Panel structure
+        if (panels == TRUE) {
+            plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid(
+                cols = ggplot2::vars(.data$group),
+                scales = "free",
+                switch = "y"
+            )
+        } else {
+            plot_list[[i]] <- plot_list[[i]] + ggplot2::facet_grid(
+                cols = ggplot2::vars(.data$variable),
+                scales = "free",
+                switch = "y"
+            )
+        }
+
+        # Cosmetics
+        plot_list[[i]] <- plot_list[[i]] +
+            ggplot2::scale_color_manual(
+                values = c(
+                    "#006CC2B3",
+                    rep("#B61A51B3", length(unique(imps_ggplot$.imp)) - 1)
+                )
+            ) +
+            ggplot2::scale_x_continuous(
+                breaks = seq(
+                    from = min(imps_ggplot_active$value),
+                    to = max(imps_ggplot_active$value),
+                    length = 5
+                ),
+                limits = c(
+                    min(imps_ggplot_active$value) - stats::sd(imps_ggplot_active$value),
+                    max(imps_ggplot_active$value) + stats::sd(imps_ggplot_active$value)
+                )
+            ) +
+            ggplot2::labs(x = vrb[i]) +
+            theme_mice() +
+            ggplot2::theme(
+                strip.text.x = ggplot2::element_blank(),
+                strip.background = ggplot2::element_blank(),
+                strip.placement = "outside",
+                strip.switch.pad.wrap = ggplot2::unit(0, "cm"),
+                legend.position = "none"
+            )
+    }
+
+    # Collect plot together with patchwork
+    if(length(plot_list) > 1){
+        if (panels == TRUE) {
+            patchwork::wrap_plots(plot_list, nrow = min(c(length(vrb), 5)))
+        } else {
+            patchwork::wrap_plots(plot_list, ncol = min(c(length(vrb), 5)))
+        }
+    } else {
+        plot_list[[1]]
+    }
+}
diff --git a/R/utils.R b/R/utils.R
@@ -104,3 +104,73 @@ verify_data <- function(data,
     }
   }
 }
+
+#' Utils function to reshape mids object into a super-long format for density plots
+#'
+#' @param data A mids object.
+#' @details
+#' 
+#' ggplot works best with long format data. This function takes a mids object and returns a super-long format version of the mids object that is easy to process with ggplot
+#' This super-long format has 5 columns:
+#' - .imp: reference imputation data set (same as in mids object)
+#' - .id: observation id (same as in mids object)
+#' - miss: response vector (logical vector indicating whether the case was missing (1) or not (0))
+#' - variable: vector of names of the variables in the original data set
+#' - value: the data point (observed, na, or imputation value)
+#' 
+#' This structure makes it easy to group, color, filter, and facet by the two most important factors for density plots: missingness and variables.
+#'
+#' @return data.frame in super-long format
+#' 
+#' @examples
+#' imp <- mice::mice(mice::nhanes, print = FALSE)
+#' reshape_mids(data = imp)
+#' @keywords internal
+#' @noRd
+reshape_mids <- function(data) {
+
+  # Create an empty list to store intermediate objects
+  shelf <- list()
+
+  # Extract imputations in long format
+  imps <- data.frame(mice::complete(data, "long", include = TRUE))
+
+  # Define column names for melting (depends on mice long format column names)
+  id_vars <- colnames(imps)[1:2]
+
+  # Define the response matrix
+  Rmat <- data$where
+
+  # Loop over the variables in the original data
+  for (j in 1:ncol(Rmat)) {
+
+    # Check if there are missing values
+    if (any(Rmat[, j])) {
+
+      # What variable are we considering
+      J <- colnames(Rmat)[j]
+
+      # Keep only the .imp identifier and the variable value
+      active_data <- imps[, c(id_vars, J)]
+
+      # Force active variable to numeric
+      active_data[, J] <- as.numeric(active_data[, J])
+
+      # attach the response indicator
+      active_data <- cbind(
+        active_data,
+        miss = Rmat[, J]
+      )
+
+      # Melt values
+      ad_melt <- reshape2::melt(active_data, id.vars = c(id_vars, "miss"))
+
+      # Store the result
+      shelf[[j]] <- ad_melt
+    }
+  }
+
+  # Combine the results from the many variables
+  do.call(rbind, shelf)
+
+}
diff --git a/man/plot_density.Rd b/man/plot_density.Rd
diff --git a/tests/testthat/test-plot_density.R b/tests/testthat/test-plot_density.R
@@ -0,0 +1,24 @@
+# create test objects
+dat <- mice::nhanes
+imp <- mice::mice(dat, printFlag = FALSE)
+
+# tests
+test_that("plot_density creates ggplot object", {
+    expect_s3_class(plot_density(imp), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = "bmi"), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = "all"), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = bmi), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp")), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = c(bmi, hyp)), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = c("bmi"), panels = TRUE), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = c("bmi", "hyp"), panels = TRUE), "ggplot")
+    expect_s3_class(plot_density(imp, vrb = "all", panels = TRUE), "ggplot")
+})
+
+test_that("plot_density returns error with incorrect argument(s)", {
+    expect_error(plot_density(dat))
+    expect_error(plot_density(imp, vrb = "test"))
+    expect_error(plot_density(imp, vrb = "age"))
+    expect_message(plot_density(imp, vrb = c("age", "bmi")))
+})
diff --git a/tests/testthat/test-reshape_mids.R b/tests/testthat/test-reshape_mids.R
@@ -0,0 +1,14 @@
+# create test objects
+dat <- mice::nhanes
+m <- 3
+imp <- mice::mice(dat, m = m, printFlag = FALSE)
+p <- sum(colSums(imp$where) > 0)
+
+# tests
+test_that("returns correct output", {
+    expect_s3_class(reshape_mids(imp), "data.frame")
+    expect_equal(
+        dim(reshape_mids(imp)),
+        c(nrow(dat) * (m + 1) * p, 5)
+    )
+})
diff --git a/vignettes/old_friends.Rmd b/vignettes/old_friends.Rmd
@@ -74,6 +74,23 @@ ggmice(imp, aes(x = hgt, group = .imp, size = .where)) +
     guide = "none"
   ) +
   theme(legend.position = "none")
+
+# Can make the same plot
+mice::densityplot(imp, ~hgt)
+plot_density(data = imp, vrb = "hgt", panels = FALSE)
+
+# Can be broken down by panels
+mice::densityplot(imp, ~ hgt | .imp)
+plot_density(data = imp, vrb = "hgt", panels = TRUE)
+
+# Can plot arbitrary variables 
+plot_density(data = imp, vrb = c("wgt", "bmi", "phb"), panels = FALSE)
+plot_density(data = imp, vrb = c("wgt", "bmi", "phb"), panels = TRUE)
+
+# Can plot all variables
+mice::densityplot(imp)
+plot_density(data = imp)
+
 ```
 
 # `fluxplot`
@@ -217,6 +234,13 @@ p <- map(vrb, ~ {
 })
 wrap_plots(p, guides = "collect") &
   theme(legend.position = "bottom")
+
+# Can make the same plot
+plot_density(data = imp)
+
+# And if needed can break down by data set
+plot_density(data = imp, panels = TRUE)
+
 ```
 
 Display strip plots for all variables.
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,6 +27,7 @@ Imports: @@
         magrittr,
         mice,
         purrr,
+        reshape2,
         rlang,
         stats,
         stringr,
@@ Expand Down @@