title | author | date | output |
---|---|---|---|
Notebook_4_RG_early_timepoint |
Laura Wolbeck |
2023-12-03 |
html_document |
This script extracts the Radial glia clusters from the dataset "Early timepoints" (E 18.5 and preterm P0) from the previous paper "Significance of birth in the maintenance of quiescent neural stem cells" Clustering and annotation of the Radial glia cells is performed. Then, with the R package cacoa, the differentially expressed genes are estimated and Gene set enrichment analysis (GSEA) is performed.
#Load helper functions
source("../scRNA_helper.R")
#Load libraries
library(pagoda2)
library(magrittr)
library(ggplot2)
library(cacoa) #version 0.4.0
library(qs)
library(org.Mm.eg.db)
library(openxlsx)
library(ggrastr)
library(enrichplot)
Read in the conos object containing early timepoints data (E 18.5 and preterm P0) and annotation from the previous paper "Significance of birth in the maintenance of quiescent neural stem cells"
anno <- readRDS("anno.rds")
con <- readRDS("con.rds")
cms <- readRDS("cms.rds")
#get RG cluster names
RG_clusters <- c("RG", "Mki67+_RG", "Intermediate_progenitor_cells")
#get RG cells
RG_cells <- names(anno[anno %in% RG_clusters])
# keep only cells in cms that are in the RG cell vector
cms_RG <- cms %>% lapply(function(x) x[,colnames(x) %in% RG_cells])
saveRDS(cms_RG, "cms_RG.rds")
names = c("E18_5_1_",
"E18_5_2_",
"pre_P0_1_",
"pre_P0_2_")
con_RG <- quickConos(cms_RG,
names,
n.cores.p2=10,
n.cores.con=20, get.tsne = TRUE, alignment.strength=0.3)
con_RG <- con_RG$con
Rerun leiden clustering to find additional clusters
con_RG$findCommunities(method = leiden.community, resolution = 3, min.group.size = 1)
leiden <- con_RG$clusters$leiden$groups %>% factor
con_RG$clusters$leiden$groups <- leiden
saveRDS(con_RG, "con_RG.rds")
anno_RG <- getConosCluster(con_RG)
Check cells per sample
sapply(cms_RG, function(d) dim(d)[2])
sum(sapply(cms_RG, function(d) dim(d)[2]))
remove "cluster 7" cells
seven <- names(anno_RG[anno_RG %in% "7"])
cms_RG %<>% lapply(function(x) x[,!colnames(x) %in% seven])
Check cells per sample after removal
sapply(cms_RG, function(d) dim(d)[2])
sum(sapply(cms_RG, function(d) dim(d)[2]))
# cms of RG clusters with cluster 7 cells removed
saveRDS(cms_RG, "cms_RG2.rds")
con_RG <- quickConos(cms_RG2,
names,
n.cores.p2=10,
n.cores.con=20, get.tsne = F, alignment.strength= 0.3)
con_RG <- con_RG$con
saveRDS(con_RG, "con_RG2.rds")
Final clusters were partly formed by manual selection of cells based on marker gene expression.
condition <- setNames(c("fullterm", "fullterm", "preterm", "preterm"), names(con_RG$samples))
Create Cacoa object
cao <- Cacoa$new(con_RG, cell.groups = anno_RG, sample.groups=condition, n.cores = 20, ref.level = "fullterm", target.level = "preterm")
cao$estimateDEPerCellType(n.cores=50, min.cell.count = 10, min.cell.frac = 0.05)
#DEs not calculated for 2 cell group(s): Immature ependymal cells-2, RG_5
DEG_early_RG <- cao$test.results$de %>% lapply("[[", "res")
sort ascending by padj
DEG_early_RG %<>% lapply( function(x) x[order(x$padj),])
DEG_early_RG_top100 <- lapply(DEG_early_RG, function(x) x[c(1:100),])
write top 100 (by padj) to excel file
write.xlsx(x = DEG_early_RG_top100, file= "DEG_early_RG_top100.xlsx")
write to excel file, one sheet per cell type
require(openxlsx)
write.xlsx(DEG_early_RG, file = "DEG_early_RG.xlsx")
cao$estimateOntology(type="GSEA", org.db=org.Mm.eg.db)
cao$saveOntologyAsTable("early_RG_GSEA.tsv", name="GSEA")
write.xlsx(x = early_RG_GSEA, file= "early_RG_GSEA.xlsx")
qsave(cao, "cao_RG_early.qs")
#defining colours of clusters
colours <- c("#074770","#3232ff", "#AFB4DB", "#659AD2", "#FF9C00","#0067C0", "#32ff32", "#ffff32", "#ffaf32") %>% setNames(c( "Lateral-ventral_RG" , "Dorsal_RG" , "Dorsal-lateral_RG" , "Primed_active_RG" , "Immature_E_cell" , "Primed_active_RG_dorsal", "NPC" , "Mki67_RG" , "Pre_E" ))
plot <- con$plotGraph(groups=anno, label="") + scale_colour_manual(values=colours)+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
plot
rasterize(plot, layers='Point', dpi=1000)
ggsave("UMAP_RG.pdf", width=7, height=4.3)
genes <- c("Rlbp1", "Crym", "Emx1", "Ascl1")
plots <- lapply(genes, function(gene) {
con$plotGraph(gene=gene, title=gene, size=0.6) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border=element_blank())
})
plot <- cowplot::plot_grid(plotlist = plots)
rasterize(plot, layers='Point', dpi=1000)
ggsave("UMAP_RG_gene_expression.pdf", width=12, height = 9, units = "cm")
enrichplot::dotplot(cao$test.results$GSEA$res$'Dorsal_RG'$BP,showCategory=c("regulation of gliogenesis",
"renal tubule morphogenesis",
"camera-type eye morphogenesis",
"eye morphogenesis",
"ureteric bud morphogenesis",
"mesonephric tubule morphogenesis",
"branching involved in ureteric bud morphogenesis",
"cardiac septum morphogenesis",
"embryonic morphogenesis",
"ear morphogenesis",
"pulmonary valve morphogenesis",
"embryonic organ morphogenesis",
"retina morphogenesis in camera-type eye",
"lens morphogenesis in camera-type eye",
"negative regulation of blood vessel morphogenesis"))+ theme(axis.text.y = element_text(size = 10))
ggsave("GSEA_Dorsal_RG.pdf", width=5.5)