Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ottiP authored Jul 11, 2022
0 parents commit 9429510
Show file tree
Hide file tree
Showing 3 changed files with 424 additions and 0 deletions.
205 changes: 205 additions & 0 deletions PCV_analysis.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
---
title: "PCV_analysis"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Load required R packages
```{r}
library(table1)
library(dplyr)
```

```{r}
### Load the data:
core_table <- read.csv("./Data/core_table_filled.csv")
core_table_norace <- read.csv("./Data/core_table_droprace_filled.csv")
supp_table_filled <- read.csv("./Data/suppl_table_filled.csv")
supp_table_6mo_filled <- read.csv("./Data/suppl_6mo_table_filled.csv")
```

## First step is to create another columnn of no response
```{r}
### Prepare the data and do some exploration
#baseline demographic characteristics (Table 1)
supp_table_filled$age <- as.factor(core_table$age)
supp_table_filled$patient_gender_code <- as.factor(core_table$patient_gender_code)
data <- supp_table_filled[supp_table_filled$COVID_severity %in% c("non_severe","resp_severe"),]
data1 <- supp_table_filled[supp_table_filled$COVID_severity == "non_severe",]
data2 <- supp_table_filled[supp_table_filled$COVID_severity =="resp_severe",]
data_comb<-merge(data1,data2,by=c("year_month","age","patient_gender_code","race_code","PCV_combined","flu_vacc","zoster_vacc","bmi_30_plus","comorbidities","income_est_mod"))
```

```{r}
## Exploratory analysis of the dataset:
print(paste0("Total number of individuals: ",sum(data$count)))
print(paste0("PCV13 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_only"])/sum(data$count)))
print(paste0("PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PPSV23_10yrs"])/sum(data$count)))
print(paste0("PCV13 and PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_PPSV23_10yrs"])/sum(data$count)))
print(paste0("Unknown PCV vaccine coverage: ",sum(data$count[data$PCV_combined=="U"])/sum(data$count)))
print(paste0("Flu vaccine: ",sum(data$count[data$"flu_vacc"=="TRUE"])/sum(data$count)))
print(paste0("Zooster vaccine: ",sum(data$count[data$"zoster_vacc"=="TRUE"])/sum(data$count)))
```

## Run the first logistic regression analysis looking at outcome: severe respiratory outcome for Covid-19 versus non-severe outcomes
```{r}
## Factor categorical variable and change reference level for race (set White as the ref level)
col_names <- c("patient_gender_code","race_code","income_est_mod","PCV_combined")
data_comb[col_names] <- lapply(data_comb[col_names], factor)
data_comb$race_code <- relevel(data_comb$race_code,ref="W")
data_comb$PCV_combined <- relevel(data_comb$PCV_combined,ref="U")
```

```{r}
m1<-glm(cbind(count.x, count.y) ~ age+patient_gender_code+race_code+PCV_combined+flu_vacc+zoster_vacc+bmi_30_plus+comorbidities+income_est_mod, data=data_comb, family = binomial("logit"))
#summary(m1)
m1.ci<-confint(m1)
```
```{r}
m1.table <- cbind(coef(m1),m1.ci)
colnames(m1.table) <- c("estimate","lower","upper")
m1.table <- exp(m1.table)
View(m1.table)
```


## Second part of the analysis: restrict to people with severe non-respiratory vs non-severe outcomes for Covid-19
```{r}
### Prepare the data and do some exploration
#baseline demographic characteristics (Table 1)
supp_table_filled$age <- as.factor(core_table$age)
supp_table_filled$patient_gender_code <- as.factor(core_table$patient_gender_code)
data <- supp_table_filled[supp_table_filled$COVID_severity %in% c("non_severe","non_resp_severe"),]
data1 <- supp_table_filled[supp_table_filled$COVID_severity == "non_severe",]
data2 <- supp_table_filled[supp_table_filled$COVID_severity =="non_resp_severe",]
data_comb<-merge(data1,data2,by=c("year_month","age","patient_gender_code","race_code","PCV_combined","flu_vacc","zoster_vacc","bmi_30_plus","comorbidities","income_est_mod"))
```

```{r}
## Exploratory analysis of the dataset:
print(paste0("Total number of individuals: ",sum(data$count)))
print(paste0("PCV13 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_only"])/sum(data$count)))
print(paste0("PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PPSV23_10yrs"])/sum(data$count)))
print(paste0("PCV13 and PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_PPSV23_10yrs"])/sum(data$count)))
print(paste0("Unknown PCV vaccine coverage: ",sum(data$count[data$PCV_combined=="U"])/sum(data$count)))
print(paste0("Flu vaccine: ",sum(data$count[data$"flu_vacc"=="TRUE"])/sum(data$count)))
print(paste0("Zooster vaccine: ",sum(data$count[data$"zoster_vacc"=="TRUE"])/sum(data$count)))
```

## Run the first logistic regression analysis looking at outcome: severe respiratory outcome for Covid-19 versus non-severe respiratory outcome
```{r}
## Factor categorical variable and change reference level for race (set White as the ref level)
col_names <- c("patient_gender_code","race_code","income_est_mod","PCV_combined")
data_comb[col_names] <- lapply(data_comb[col_names], factor)
data_comb$race_code <- relevel(data_comb$race_code,ref="W")
data_comb$PCV_combined <- relevel(data_comb$PCV_combined,ref="U")
```

```{r}
m2<-glm(cbind(count.x, count.y) ~ age+patient_gender_code+race_code+PCV_combined+flu_vacc+zoster_vacc+bmi_30_plus+comorbidities+income_est_mod, data=data_comb, family = binomial("logit"))
#summary(m2)
m2.ci<-confint(m2)
```

```{r}
m2.table <- cbind(coef(m2),m2.ci)
colnames(m2.table) <- c("estimate","lower","upper")
m2.table <- exp(m2.table)
View(m2.table)
```

## Second part of the analysis: restrict to people with severe non-respiratory vs non-severe outcomes for Covid-19
```{r}
### Prepare the data and do some exploration
#baseline demographic characteristics (Table 1)
supp_table_filled$age <- as.factor(core_table$age)
supp_table_filled$patient_gender_code <- as.factor(core_table$patient_gender_code)
data <- supp_table_filled[supp_table_filled$COVID_severity %in% c("non_severe","ICU_crit_care"),]
data1 <- supp_table_filled[supp_table_filled$COVID_severity == "non_severe",]
data2 <- supp_table_filled[supp_table_filled$COVID_severity =="ICU_crit_care",]
data_comb<-merge(data1,data2,by=c("year_month","age","patient_gender_code","race_code","PCV_combined","flu_vacc","zoster_vacc","bmi_30_plus","comorbidities","income_est_mod"))
```

```{r}
## Exploratory analysis of the dataset:
print(paste0("Total number of individuals: ",sum(data$count)))
print(paste0("PCV13 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_only"])/sum(data$count)))
print(paste0("PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PPSV23_10yrs"])/sum(data$count)))
print(paste0("PCV13 and PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_PPSV23_10yrs"])/sum(data$count)))
print(paste0("Unknown PCV vaccine coverage: ",sum(data$count[data$PCV_combined=="U"])/sum(data$count)))
print(paste0("Flu vaccine: ",sum(data$count[data$"flu_vacc"=="TRUE"])/sum(data$count)))
print(paste0("Zooster vaccine: ",sum(data$count[data$"zoster_vacc"=="TRUE"])/sum(data$count)))
```

## Run the first logistic regression analysis looking at outcome: severe respiratory outcome for Covid-19 versus non-severe respiratory outcome
```{r}
## Factor categorical variable and change reference level for race (set White as the ref level)
col_names <- c("patient_gender_code","race_code","income_est_mod","PCV_combined")
data_comb[col_names] <- lapply(data_comb[col_names], factor)
data_comb$race_code <- relevel(data_comb$race_code,ref="W")
data_comb$PCV_combined <- relevel(data_comb$PCV_combined,ref="U")
```

```{r}
m3<-glm(cbind(count.x, count.y) ~ age+patient_gender_code+race_code+PCV_combined+flu_vacc+zoster_vacc+bmi_30_plus+comorbidities+income_est_mod, data=data_comb, family = binomial("logit"))
#summary(m3)
m3.ci<-confint(m3)
```

```{r}
m3.table <- cbind(coef(m3),m3.ci)
colnames(m3.table) <- c("estimate","lower","upper")
m3.table <- exp(m3.table)
View(m3.table)
```

## Fourth part of the analysis: restrict to people with severe non-respiratory vs non-severe outcomes for Covid-19
```{r}
### Prepare the data and do some exploration
#baseline demographic characteristics (Table 1)
supp_table_filled$age <- as.factor(core_table$age)
supp_table_filled$patient_gender_code <- as.factor(core_table$patient_gender_code)
data <- supp_table_filled[supp_table_filled$COVID_severity %in% c("ICU_crit_care","resp_severe"),]
data1 <- supp_table_filled[supp_table_filled$COVID_severity == "ICU_crit_care",]
data2 <- supp_table_filled[supp_table_filled$COVID_severity =="resp_severe",]
data_comb<-merge(data1,data2,by=c("year_month","age","patient_gender_code","race_code","PCV_combined","flu_vacc","zoster_vacc","bmi_30_plus","comorbidities","income_est_mod"))
```

```{r}
## Exploratory analysis of the dataset:
print(paste0("Total number of individuals: ",sum(data$count)))
print(paste0("PCV13 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_only"])/sum(data$count)))
print(paste0("PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PPSV23_10yrs"])/sum(data$count)))
print(paste0("PCV13 and PPSV23 vaccine coverage: ",sum(data$count[data$PCV_combined=="PCV13_PPSV23_10yrs"])/sum(data$count)))
print(paste0("Unknown PCV vaccine coverage: ",sum(data$count[data$PCV_combined=="U"])/sum(data$count)))
print(paste0("Flu vaccine: ",sum(data$count[data$"flu_vacc"=="TRUE"])/sum(data$count)))
print(paste0("Zooster vaccine: ",sum(data$count[data$"zoster_vacc"=="TRUE"])/sum(data$count)))
```

## Run the first logistic regression analysis looking at outcome: severe respiratory outcome for Covid-19 versus non-severe respiratory outcome
```{r}
## Factor categorical variable and change reference level for race (set White as the ref level)
col_names <- c("patient_gender_code","race_code","income_est_mod","PCV_combined")
data_comb[col_names] <- lapply(data_comb[col_names], factor)
data_comb$race_code <- relevel(data_comb$race_code,ref="W")
data_comb$PCV_combined <- relevel(data_comb$PCV_combined,ref="U")
```

```{r}
m4<-glm(cbind(count.x, count.y) ~ age+patient_gender_code+race_code+PCV_combined+flu_vacc+zoster_vacc+bmi_30_plus+comorbidities+income_est_mod, data=data_comb, family = binomial("logit"))
#summary(m4)
m4.ci<-confint(m4)
```

```{r}
m4.table <- cbind(coef(m4),m4.ci)
colnames(m4.table) <- c("estimate","lower","upper")
m4.table <- exp(m4.table)
View(m4.table)
```



13 changes: 13 additions & 0 deletions PCV_analysis.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
Loading

0 comments on commit 9429510

Please sign in to comment.