Skip to content

Commit

Permalink
June 12 update
Browse files Browse the repository at this point in the history
  • Loading branch information
DanWeinberger committed Jun 14, 2020
1 parent 39bcb1f commit a7574f3
Show file tree
Hide file tree
Showing 20 changed files with 60,693 additions and 10 deletions.
15,341 changes: 15,341 additions & 0 deletions Archives_FluView_state_data/State_Custom_Data21.csv

Large diffs are not rendered by default.

15,393 changes: 15,393 additions & 0 deletions Archives_FluView_state_data/State_Custom_Data22.csv

Large diffs are not rendered by default.

15,445 changes: 15,445 additions & 0 deletions Archives_FluView_state_data/State_Custom_Data23.csv

Large diffs are not rendered by default.

Binary file added Data/cdc_covid_data/2020_06_12_11_50.rds
Binary file not shown.
Binary file not shown.
Binary file added Data/ilinet_state/2020_06_05_11_23.rds
Binary file not shown.
Binary file added Data/nrevss_hhs/2020_06_12_11_49.rds
Binary file not shown.
Binary file added Data/nrevss_national/2020_06_12_11_49.rds
Binary file not shown.
Binary file added Data/nrevss_state/2020_06_12_11_49.rds
Binary file not shown.
Binary file added Data/pi_mortality_state/2020_06_12_11_50.rds
Binary file not shown.
346 changes: 346 additions & 0 deletions Data/provisional_pi/provisional2019-2020_week_19.csv

Large diffs are not rendered by default.

347 changes: 347 additions & 0 deletions Data/provisional_pi/provisional2019-2020_week_20.csv

Large diffs are not rendered by default.

348 changes: 348 additions & 0 deletions Data/provisional_pi/provisional2019-2020_week_21.csv

Large diffs are not rendered by default.

349 changes: 349 additions & 0 deletions Data/provisional_pi/provisional2019-2020_week_22.csv

Large diffs are not rendered by default.

350 changes: 350 additions & 0 deletions Data/provisional_pi/provisional2019-2020_week_23.csv

Large diffs are not rendered by default.

Binary file modified outputs/NobBs.complete.iters.rds
Binary file not shown.
12,390 changes: 12,390 additions & 0 deletions outputs/national_and_state_summary.csv

Large diffs are not rendered by default.

26 changes: 21 additions & 5 deletions under_reporting.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ wk22 <-
wk22$max.date.report <- as.Date('2020-05-30')
wk22$report.date <- as.Date('2020-06-05')
wk23 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data23.csv')
wk23$max.date.report <- as.Date('2020-06-06')
wk23$report.date <- as.Date('2020-06-12')
#Read in national data
Expand All @@ -110,7 +115,7 @@ provis.list <- lapply(c('01','02','03','04','05','06','07','08','09','10','11','
})
nat.reports <- do.call('rbind.data.frame',provis.list)
all.reports <- rbind.data.frame(wk11,wk12,wk13,wk15, wk16, wk17, wk18, wk19, wk20, wk21, wk22)
all.reports <- rbind.data.frame(wk11,wk12,wk13,wk15, wk16, wk17, wk18, wk19, wk20, wk21, wk22, wk23)
all.reports$epiyr <-
Expand Down Expand Up @@ -171,6 +176,9 @@ compare.m.alt <-
compare.m.alt <-
compare.m.alt[as.numeric((compare.m.alt$report_date - compare.m.alt$death_date)) > 6,]
#Only use death data from Apr 1 onwards to account for changes in reporting during pandemic
compare.m.alt <-
compare.m.alt[compare.m.alt$death_date >=as.Date('2020-03-29'),]
#Split by state and death date
compare.m.alt.spl <-
Expand Down Expand Up @@ -455,6 +463,14 @@ prop.report.wk.iter <- sapply(res1,'[[', 'probs.samps', simplify='array')
complete.wk.iter <- apply(prop.report.wk.iter, c(1,3),function(x) cumsum(exp(x)))
dimnames(complete.wk.iter)[[1]] <- 2:(dim(complete.wk.iter)[1]+1)
saveRDS(complete.wk.iter,'./outputs/NobBs.complete.iters.rds')
#this is based on obs/exp from NobBS
# complete.prop.est.median.df <-
# cbind.data.frame(complete.prop.est.iter.df[,1], apply(complete.prop.est.iter.df[,-1],1, median))
#
# complete.wk.iter.median <- apply(complete.wk.iter, c(1,3), median)
```


Expand All @@ -477,10 +493,10 @@ abline(v=as.Date('2020-04-18'), lty=2, col='gray')
```

#As a check, let's look at the backfilling patterns for certain states
## As a check, let's look at the backfilling patterns for certain states
```{r, fig.width=6, fig.height=8}
check1.m <- melt(compare.m[,c('state','death_date','report_date', 'N_deaths')], id.vars=c('state','death_date','report_date'))
check1.m <- melt(compare.m.alt[,c('state','death_date','report_date', 'N_deaths')], id.vars=c('state','death_date','report_date'))
check1.m$time.report <- as.numeric(check1.m$report_date - check1.m$death_date)
check1.m <- check1.m[check1.m$time.report>7,]
check1.c <- acast(check1.m, state~death_date~report_date)
Expand All @@ -489,7 +505,7 @@ check2 <- check1.c[,dates1 >=as.Date('2020-01-01'),]
dates2 <- dates1[dates1 >=as.Date('2020-01-01')]
#plot.states <- c('Delaware', 'Louisiana', 'New York', 'Florida')
plot.states <- c('Delaware', 'Louisiana', 'New York', 'Florida', 'South Carolina','Indiana' )
plot.states <- c('Delaware', 'Louisiana', 'New York', 'Florida', 'South Carolina','Washington' )
par(mfrow=c(3,2))
for(i in plot.states){
check3 <- check2[i,,]
Expand All @@ -499,7 +515,7 @@ dates2 <- dates1[dates1 >=as.Date('2020-01-01')]
dates2 <- dates1[dates1 >=as.Date('2020-01-01')]
matplot(dates2 ,check3, type='l', bty='l', xaxt='n', xlab='', ylab='Provisional Count', main=i, ylim=c(0, max(check3, na.rm=T)))
axis(side=1, at=seq.Date(from=min(dates2), to=max(dates2), length.out=length(dates2)), label=dates2)
abline(v=as.Date('2020-04-18'))
abline(v=as.Date('2020-04-01'))
}
```

258 changes: 258 additions & 0 deletions under_reporting_validate.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
---
title: "Data completeness state"
author: "Dan Weinberger"
date: "4/22/2020"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(ExcessILI)
library(cdcfluview)
library(reshape2)
library(ggplot2)
library(lubridate)
library(RColorBrewer)
library(plotly)
library(MMWRweek)
library(readr)
library(rjson)
library(htmlTable)
library(RSocrata)
library(pdftools)
library(readr)
library(gsubfn)
library(INLA)
library (RCurl)
library(rjags)
library(HDInterval)
library(pbapply)
library(parallel)
```
## Backfilling
#NOTE THE DATA ARE MISSING FOR JULY-OCT 2019

```{r}
#Data from Andrew: NOTE: it looks like July-oct 2019 is missing from these data
wk11 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data11.csv')
wk11$max.date.report <- as.Date('2020-03-14')
wk11$report.date <- as.Date('2020-03-27')
wk12 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data12.csv')
wk12$max.date.report <- as.Date('2020-03-21')
wk12$report.date <- as.Date('2020-04-03')
wk13 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data13.csv')
wk13$max.date.report <- as.Date('2020-03-28')
wk13$report.date <- as.Date('2020-04-10')
wk15 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data15.csv')
wk15$max.date.report <- as.Date('2020-04-11')
wk15$report.date <- as.Date('2020-04-17')
wk16 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data16.csv')
wk16$max.date.report <- as.Date('2020-04-18')
wk16$report.date <- as.Date('2020-04-24')
wk17 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data17.csv')
wk17$max.date.report <- as.Date('2020-04-25')
wk17$report.date <- as.Date('2020-05-01')
wk18 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data18.csv')
wk18$max.date.report <- as.Date('2020-05-02')
wk18$report.date <- as.Date('2020-05-08')
wk19 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data19.csv')
wk19$max.date.report <- as.Date('2020-05-09')
wk19$report.date <- as.Date('2020-05-15')
wk20 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data20.csv')
wk20$max.date.report <- as.Date('2020-05-16')
wk20$report.date <- as.Date('2020-05-22')
wk21 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data21.csv')
wk21$max.date.report <- as.Date('2020-05-23')
wk21$report.date <- as.Date('2020-05-29')
wk22 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data22.csv')
wk22$max.date.report <- as.Date('2020-05-30')
wk22$report.date <- as.Date('2020-06-05')
wk23 <-
read.csv('./Archives_FluView_state_data/State_Custom_Data23.csv')
wk23$max.date.report <- as.Date('2020-06-06')
wk23$report.date <- as.Date('2020-06-12')
#Read in national data
provis.list <- lapply(c('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22'),
function(x){
d1 <- read.csv(paste0('./Data/provisional_pi/provisional', '2019-2020','_','week_',x,'.csv'))
names(d1) <- toupper(names(d1))
d1$week.death <- MMWRweek2Date(d1$YEAR, d1$WEEK) + days (6)
d1$max.date.report <- max(d1$week.death)
# d1$report.date <- d1$max.date.report + days(12)
d1$report.date <- mmwr_week_to_date(2020,week= as.numeric(x))+ days(12)
d1$SUB.AREA <- 'US'
d1$epiyr <- d1$YEAR
d1$epiyr[d1$WEEK<=26] <- d1$YEAR[d1$WEEK<=26] - 1
return(d1)
})
nat.reports <- do.call('rbind.data.frame',provis.list)
all.reports <- rbind.data.frame(wk11,wk12,wk13,wk15, wk16, wk17, wk18, wk19, wk20, wk21, wk22, wk23)
all.reports$epiyr <-
as.numeric(as.character(substr(all.reports$SEASON,1,4)))
all.reports$year <- all.reports$epiyr
all.reports$year[all.reports$WEEK<=26] <-
all.reports$epiyr[all.reports$WEEK<=26] +1
all.reports$week.death <-
mmwr_week_to_date(all.reports$year, all.reports$WEEK)+6
#Fix formatting for the count variables
all.reports$NUM.INFLUENZA.DEATHS <-
gsub(',','',all.reports$NUM.INFLUENZA.DEATHS)
all.reports$NUM.INFLUENZA.DEATHS <- as.numeric(as.character(all.reports$NUM.INFLUENZA.DEATHS))
all.reports$NUM.PNEUMONIA.DEATHS <-
gsub(',','',all.reports$NUM.PNEUMONIA.DEATHS)
all.reports$NUM.PNEUMONIA.DEATHS <- as.numeric(as.character(all.reports$NUM.PNEUMONIA.DEATHS))
all.reports$TOTAL.DEATHS <-
gsub(',','',all.reports$TOTAL.DEATHS)
all.reports$TOTAL.DEATHS <- as.numeric(as.character(all.reports$TOTAL.DEATHS))
nat.reports2 <-
nat.reports[, c('week.death','SUB.AREA','report.date', 'ALL.DEATHS')]
names(nat.reports2) <- c("week.death","SUB.AREA" ,"report.date","TOTAL.DEATHS")
all.reports2 <- rbind.data.frame(all.reports[,c("week.death","SUB.AREA" ,"report.date","TOTAL.DEATHS") ], nat.reports2)
```

```{r}
#Reporting delays from NobBS
delays <- readRDS('./outputs/NobBs.complete.iters.rds')
dimnames(delays)[[3]][dimnames(delays)[[3]]=='US'] <- 'United States'
#dimnames(delays)[[3]] <-
# state.abb2[match(dimnames(delays)[[3]] , state.name2)]
#states1.match <- unique()
#states2.match <- dimnames(delays)[[3]]
#commn.states <- Reduce(intersect, list(states1.match,states2.match))
#delays <- delays[,,commn.states]
delays <- delays[,1:10000,]
delays.m <- melt(delays)
names(delays.m) <- c('time.since.death','iter','state','prop')
delays.med <- apply(delays,c (1,3),median)
delays.med.m <- melt(delays.med)
delays.med.m$Var1 <- as.numeric(as.character(delays.med.m$Var1))
names(delays.med.m) <- c('weeks.since.death','state','prop.complete')
```

```{r}
#Merge in reporting delay info
all.reports2$vintage <- all.reports2$report.date
all.reports2$week_end <- all.reports2$week.death
all.reports2$n.weeks.ago <- round(as.numeric(difftime(all.reports2$vintage , all.reports2$week_end, units='weeks')))
#Merge in reporting delay
analysis.data <- merge(all.reports2,delays.med.m, by.x=c('SUB.AREA','n.weeks.ago'), by.y=c('state',"weeks.since.death"), all=T)
analysis.data <-
analysis.data[analysis.data$n.weeks.ago >= 2 ,]
```

##Estimate how many deaths will ultimately be reported
```{r}
analysis.data$estimated.deaths <-
analysis.data$TOTAL.DEATHS / analysis.data$prop.complete
comp1 <- analysis.data[, c('SUB.AREA','week_end','estimated.deaths','n.weeks.ago')]
comp1.m <- melt(comp1, id.vars=c('SUB.AREA','week_end','n.weeks.ago'))
comp1.c <-dcast(comp1.m, SUB.AREA+week_end ~ n.weeks.ago)
comp1.c <-
comp1.c[comp1.c$week_end >= as.Date('2020-03-08'),]
```

#across all states, how do the estimates change 2,3,4... weeks out, over time?

Each line on this plot represents an estimate for that date 2,3,4.. weeks over time. This shows that the estimates from 2 weeks ago are an underestimate for recent dates, but they stabilize by 3 week
```{r, fig.width=5, fig.height=5}
comp1.c.agg <- aggregate(comp1.c[,-c(1:2)], by=list('week'=comp1.c$week_end), FUN=sum, na.rm=T)
comp1.c.agg[,-c(1)] <- apply(comp1.c.agg[,-c(1)],2, function(x){
x[x==0] <- NA
return(x)
})
alpha.set=0.75
plot.cols <-
c( rgb(228/256,26/256,28/256, alpha=alpha.set),
rgb(55/256,126/256,184/256, alpha=alpha.set),
rgb(77/256,175/256,74/256, alpha=alpha.set),
rgb(152/256,78/256,163/256, alpha=alpha.set),
rgb(55/256,127/256,0/256, alpha=alpha.set)
)
comp1.c.agg <- comp1.c.agg[,c(1:6)]
matplot(comp1.c.agg$week, comp1.c.agg[,-1], pch=c(16:20),bty='l', ylim=c(0,max(comp1.c.agg[,-1], na.rm=T) ), xaxt='n', col=plot.cols, ylab='Deaths adjusted for reporting delays', xlab='')
axis(side=1, at=as.Date(c('2020-03-01','2020-04-01','2020-05-01','2020-06-01')) , label=c('2020-03-01','2020-04-01','2020-05-01','2020-06-01'))
legend('bottomleft', legend=c('2 weeks', '3 weeks', '4 weeks', '5 weeks', '6 weeks'), ncol=2, pch=c(16:20),box.lty=0,inset=0.01, col=plot.cols)
```

## how much do thing increase N weeks out compared to 2 weeks, across all states
```{r}
comp1.c.agg.change <-t(apply(comp1.c.agg[,-1], 1, function(x){
x<- x/x[1]
return(x)
})
)
matplot(comp1.c.agg.change[,-1], type='l',xlab='Week of death', ylab='Relative change from deaths reported fater 2 weeks')
abline(h=1)
```

## look by state
There are a handful of states where the etsimates from 2 weeks ago are a severe underestimate, but stabilize after 3 weeks
```{r}
comp1.c.change <-t(apply(comp1.c[,-c(1:2)], 1, function(x){
x<- x/x[1]
return(x)
})
)
comp1.c.change <-
cbind.data.frame(comp1.c[,c(1:2)],comp1.c.change)
matplot(comp1.c.change$week_end,
comp1.c.change[,c(4:9)], type='p',xlab='Week of death', ylab='Relative change from deaths reported fater 2 weeks')
abline(h=1)
```


```{r}
```





Loading

0 comments on commit a7574f3

Please sign in to comment.