Skip to content

Commit

Permalink
push tests and data
Browse files Browse the repository at this point in the history
  • Loading branch information
ottip committed Jul 13, 2022
1 parent 9429510 commit 1f3bde8
Show file tree
Hide file tree
Showing 10 changed files with 858,318 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
30,241 changes: 30,241 additions & 0 deletions Data/No_pharma_cov/core_table_droprace_filled.csv

Large diffs are not rendered by default.

30,241 changes: 30,241 additions & 0 deletions Data/No_pharma_cov/core_table_filled.csv

Large diffs are not rendered by default.

368,641 changes: 368,641 additions & 0 deletions Data/No_pharma_cov/suppl_6mo_table_filled.csv

Large diffs are not rendered by default.

30,241 changes: 30,241 additions & 0 deletions Data/core_table_droprace_filled.csv

Large diffs are not rendered by default.

30,241 changes: 30,241 additions & 0 deletions Data/core_table_filled.csv

Large diffs are not rendered by default.

368,641 changes: 368,641 additions & 0 deletions Data/suppl_6mo_table_filled.csv

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions Example_logistic_reg_agg_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
library(tidyverse)
set.seed(5)
df<-tibble(Gender = as.factor(sample(c("m","f"), 200, replace = TRUE, prob=c(0.6,0.4))),
Age_Group = as.factor(sample(c("[<30]","[30-65]", "[65+]"), 200, replace = TRUE, prob=c(0.3,0.6,0.1))),
Response = rbinom(200, 1, prob = 0.2))
df
model1<-glm(Response ~ Gender+Age_Group, data = df, family = binomial("logit"))
summary(model1)

df_agg<-df%>%group_by(Gender, Age_Group)%>%summarise(Impressions=n(), Responses=sum(Response))%>%
ungroup()%>%mutate(RR=Responses/Impressions)
df_agg

df_agg$No_Responses <- df_agg$Impressions- df_agg$Responses
df_agg

m3<-glm(cbind(Responses, No_Responses) ~ Gender+Age_Group, data=df_agg, family = binomial("logit"))
summary(m3)
44 changes: 42 additions & 2 deletions PCV_analysis_with_pharma.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,49 @@ knitr::opts_chunk$set(echo = TRUE)
```{r}
library(table1)
library(dplyr)
library(ggplot2)
library(GGally)
```

```{r}
### Load the data:
core_table <- read.csv("./Data/core_table_filled.csv")
core_table_norace <- read.csv("./Data/core_table_droprace_filled.csv")
supp_table_filled <- read.csv("./Data/suppl_table_filled.csv")
supp_table_filled <- read.csv("~/Documents/Data_pharma/suppl_table_filled.csv")
supp_table_6mo_filled <- read.csv("./Data/suppl_6mo_table_filled.csv")
```

## Check that individuals are the same across dataframes
```{r}
print(paste0("Total number of individuals: "))
sum(core_table$count)
print("\n")
sum(core_table_norace$count)
print("\n")
sum(supp_table_filled$count)
print("\n")
sum(supp_table_6mo_filled$count)
```

## Exploratory analysis: descriptive stats
```{r}
Tot_ind <- sum(supp_table_filled$count)
vax_by_age<-supp_table_filled %>% group_by(age,PCV_combined) %>% summarise(prop = sum(count)/Tot_ind)
vax_by_com<-supp_table_filled %>% group_by(comorbidities,PCV_combined) %>% summarise(prop = sum(count)/Tot_ind)
```

## Plot histograms
```{r}
ggplot(vax_by_age,aes(age,prop,fill=PCV_combined))+
geom_bar(stat="identity",position='dodge')
ggplot(vax_by_com,aes(comorbidities,prop,fill=PCV_combined))+
geom_bar(stat="identity",position='dodge')
```




## First step is to create another columnn of no response
```{r}
### Prepare the data and do some exploration
Expand All @@ -35,7 +68,6 @@ data_comb<-merge(data1,data2,by=c("year_month","age","patient_gender_code","race

```{r}
## Exploratory analysis of the dataset:
print(paste0("Total number of individuals: ",sum(data$count)))
print(paste0("PCV13 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_only"])/sum(data$count)))
print(paste0("PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PPSV23_10yrs"])/sum(data$count)))
print(paste0("PCV13 and PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_PPSV23_10yrs"])/sum(data$count)))
Expand All @@ -58,6 +90,14 @@ m1<-glm(cbind(count.x, count.y) ~ age+patient_gender_code+race_code+PCV_combined
#summary(m1)
m1.ci<-confint(m1)
```
##Check collinearity among the covariates using pair-wise correlation matrix
```{r}
X<-data_comb[,2:10]
ggpairs(X)
```



```{r}
m1.table <- cbind(coef(m1),m1.ci)
Expand Down
8 changes: 8 additions & 0 deletions Tests.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
data[data$year_month=="2021-12" & data$age=="1" & data$patient_gender_code=="F"&data$race_code=="U"&data$PCV_combined=="U"&data$flu_vacc
=="FALSE"&data$zoster_vacc=="FALSE"&data$bmi_30_plus=="FALSE"&data$comorbidities=="FALSE"&data$income_est_mod=="U",]


supp_table_filled[supp_table_filled$year_month=="2021-12" & supp_table_filled$age=="1" & supp_table_filled$patient_gender_code=="F"& supp_table_filled$race_code=="U"&supp_table_filled$PCV_combined=="U"&
supp_table_filled$flu_vacc
=="FALSE"&supp_table_filled$zoster_vacc=="FALSE"&supp_table_filled$bmi_30_plus=="FALSE"&supp_table_filled$comorbidities=="FALSE"&supp_table_filled$income_est_mod=="U"
&supp_table_filled$COVID_severity!="non_resp_severe"& supp_table_filled$COVID_severity!="ICU_crit_care",]

0 comments on commit 1f3bde8

Please sign in to comment.