appendix.Rmd

---
title: Simulations of examples in appendices
author: Gaebler et al.
date: Tuesday, October 11, 2020
output:
  github_document:
    pandoc_args: --webtex
---

```{r setup}
library(glue)
library(magrittr)
library(tidyverse)

# Simulate in a population of 1,000,000.
pop_size <- 1e6

# Fix random seed.
set.seed(0)

# Print three digits only.
options(digits = 3)

# Set the ggplot theme.
theme_set(theme_bw(base_size = 20))

# Output results at the end of the chunk.
knitr::opts_chunk$set(results = "hold")
```

# Overview

This notebook contains simulations of the examples constructed in the proofs of
Theorem A.5 and Proposition B.1 in the appendices of _A Causal Framework for
Observational Studies of Discrimination_, complementing the proofs contained
there.

Each simulation generates a "book of life," i.e., the complete set of potential
outcomes for a population, drawn according to the joint distribution defined in
the corresponding example.

# Case 2: Subset ignorability does not imply sequential ignorability

```{r case_2}
case_2_book_of_life <- tibble(
  Y_b_1 = 1L,
  Y_w_1 = 1L,
  Z = rbinom(pop_size, 1, 1/2),
  M_b = if_else(Z == 1, 1L, rbinom(pop_size, 1, 1/2)),
  M_w = if_else(Z == 1, 1L, rbinom(pop_size, 1, 1/2)),
  M = if_else(Z == 1, M_b, M_w),
  Y = if_else(M == 1, if_else(Z == 1, Y_b_1, Y_w_1), 0L)
)

# Subset ignorability holds.
cat("## SUBSET IGNORABILITY HOLDS.\n")
case_2_book_of_life %>%
  filter(M == 1) %>%
  group_by(Z) %>%
  summarize(across(c(Y_b_1, Y_w_1), mean), .groups = "drop") %>%
  pivot_longer(
    c(Y_b_1, Y_w_1),
    names_to = "po",
    values_to = "mean"
  ) %>%
  arrange(po) %>%
  pmap_chr(
    glue,
    "For individuals with Z = { Z }, the mean of { po } is { format(mean) }.\n",
    .trim = FALSE
  ) %>%
  walk(cat)

# Sequential ignorability fails.
cat("\n## SEQUENTIAL IGNORABILITY FAILS.\n")
case_2_book_of_life %>%
  group_by(Z) %>%
  summarize(across(c(M_b, M_w), mean), .groups = "drop") %>%
  pivot_longer(
    c(M_b, M_w),
    names_to = "po",
    values_to = "mean"
  ) %>%
  arrange(po) %>%
  pmap_chr(
    glue,
    "For individuals with Z = { Z }, the mean of { po } is { format(mean) }.\n",
    .trim = FALSE
  ) %>%
  walk(cat)
```

# Case 3: Consistency of $\Delta_n$ does not imply subset ignorability holds

```{r case_3}
case_3_book_of_life <- tibble(
  Z = rbinom(pop_size, 1, 1/2),
  M = 1L,
  Y_b_1 = if_else(Z == 1, 1L, rbinom(pop_size, 1, 1/2)),
  Y_w_1 = if_else(Z == 1, 0L, rbinom(pop_size, 1, 1/2)),
  Y = if_else(M == 1, if_else(Z == 1, Y_b_1, Y_w_1), 0L)
)

# Δ_n is a consistent estimator.
cat("## Δ_n IS A CONSISTENT ESTIMATOR.\n")
SATE_M <- case_3_book_of_life %>%
  filter(M == 1) %>%
  with(mean(Y_b_1 - Y_w_1)) %>%
  format()
Delta_n <- case_3_book_of_life %>%
  filter(M == 1) %>%
  group_by(Z) %>%
  summarize(Y = mean(Y), .groups = "drop") %>%
  with(Y[Z == 1] - Y[Z == 0]) %>%
  format()
cat(glue(
  "The true SATE_M is { SATE_M } and Δ_n is { Delta_n }.\n",
  .trim = FALSE
))

# Subset ignorability holds.
cat("\n## SUBSET IGNORABILITY FAILS.\n")
case_3_book_of_life %>%
  filter(M == 1) %>%
  group_by(Z) %>%
  summarize(across(c(Y_b_1, Y_w_1), mean), .groups = "drop") %>%
  pivot_longer(
    c(Y_b_1, Y_w_1),
    names_to = "po",
    values_to = "mean"
  ) %>%
  arrange(po) %>%
  pmap_chr(
    glue,
    "For individuals with Z = { Z }, the mean of { po } is { format(mean) }.\n",
    .trim = FALSE
  ) %>%
  walk(cat)
```

<h2>Case 6: Treatment ignorability does not imply $\Delta_n$ is a consistent
estimator of the $\text{SATE}_M$</h2>

```{r case_6}
gen_case_6_book_of_life <- function(alpha) {
  tibble(
    Z = rbinom(pop_size, 1, 1/2),
    M_b = 1L,
    M_w = rbinom(pop_size, 1, 1/2),
    Y_b_1 = 1L,
    Y_w_1 = if_else(M_w == 0, rbinom(pop_size, 1, alpha), 1L),
    M = if_else(Z == 1, M_b, M_w),
    Y = if_else(M == 1, if_else(Z == 1, Y_b_1, Y_w_1), 0L)
  )
}

extract_estimator_and_estimand <- function(book_of_life) {
  book_of_life %<>%
    filter(M == 1)

  SATE_M <- book_of_life %>%
    with(mean(Y_b_1 - Y_w_1))

  Delta_n <- book_of_life %>%
    group_by(Z) %>%
    summarize(Y = mean(Y), .groups = "drop") %>%
    with(Y[Z == 1] - Y[Z == 0])

  tibble(SATE_M = SATE_M, Delta_n = Delta_n)
}

# Do a grid search over alpha between 0 and 1, and plot the results.
seq(0, 1, length.out = 101) %>%
  map_dfr(compose(extract_estimator_and_estimand, gen_case_6_book_of_life)) %>%
  mutate(alpha = seq(0, 1, length.out = 101)) %>%
  pivot_longer(cols = c(SATE_M, Delta_n)) %>%
  ggplot(aes(x = alpha, y = value, color = name)) +
  scale_color_manual(
    values = c("red", "blue"),
    labels = c(expr(Delta[n]), expr(SATE[M]))
  ) +
  geom_point() +
  labs(
    x = expr(alpha),
    y = NULL,
    color = "Quantity"
  )
```

# Proposition B.1

Since Proposition B.1 concerns DAGs, we simulate joint distribution by directly
implementing the structural causal model.

```{r proposition_B_1}
f_Z <- identity
f_Q <- identity

f_M <- function(z, q, u_M) {
  val <- (1 + z) * (((q == 1) + (z == 1 & q == 3) + (z == 0 & q == 2)) / 2)
  return(as.integer(u_M <= val))
}

f_Y <- function(z, m, q, u_Y) {
  val <- (1 + z) * ((q == 1) / 2)
  return(m * as.integer(u_Y <= val))
}

prop_b_1_book_of_life <- tibble(
  # Generate the exogenous variables.
  U_Z = rbinom(pop_size, 1, 1/2),
  U_Q = sample(1:4, pop_size, replace = TRUE),
  U_M = runif(pop_size),
  U_Y = runif(pop_size),

  # Generate the endogenous variables.
  Z = f_Z(U_Z),
  Q = f_Q(U_Q),
  M = f_M(Z, Q, U_M),
  Y = f_Y(Z, M, Q, U_Y),

  # Generate the potential outcomes.
  M_b = f_M(1L, Q, U_M),
  M_w = f_M(0L, Q, U_M),
  Y_b_1 = f_Y(1L, 1L, Q, U_Y),
  Y_w_1 = f_Y(0L, 1L, Q, U_Y)
)

# Mediator ignorability is violated.
cat("## MEDIATOR IGNORABILITY FAILS.\n")
prop_b_1_book_of_life %>%
  filter(M_b == 1) %>%
  group_by(Z, M_w) %>%
  summarize(mean = mean(Y_b_1), .groups = "drop") %>%
  pmap_chr(glue,
    "For individuals with Z = { Z }, M_b = 1, and M_w = { M_w }, ",
    "the probability that Y_b_1 = 1 is { format(mean) }.\n",
    .trim = FALSE
  ) %>%
  walk(cat)

# Subset ignorability holds.
cat("\n## SUBSET IGNORABILITY HOLDS.\n")
prop_b_1_book_of_life %>%
  filter(M == 1) %>%
  group_by(Z) %>%
  summarize(across(c(Y_b_1, Y_w_1), mean), .groups = "drop") %>%
  pivot_longer(
    c(Y_b_1, Y_w_1),
    names_to = "po",
    values_to = "mean"
  ) %>%
  arrange(po) %>%
  pmap_chr(
    glue,
    "For individuals with Z = { Z }, the mean of { po } is { format(mean) }.\n",
    .trim = FALSE
  ) %>%
  walk(cat)
```