-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Completed migration of Hippo and Snyder scripts
Moved files from https://github.com/leekgroup/derSupplement Updated timing report (less plots, better plots) Updated venn report to show information for both Snyder and Hippo data sets Updated index to reflect the files this repo describes
- Loading branch information
1 parent
5d3caa0
commit 1342860
Showing
49 changed files
with
16,241 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
GenomicState.Hsapiens.UCSC.hg19.knownGene.rda | ||
figure-hippo-reanalysis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
### | ||
# qsub -V -pe local 24 -l jabba,mf=80G,h_vmem=16G,h_stack=256M -cwd -b y R CMD BATCH --no-save feature_counts.R | ||
source("/home/epi/ajaffe/Lieber/lieber_functions_aj.R") | ||
|
||
library(Rsubread) | ||
|
||
## stem | ||
xx=load("/home/epi/ajaffe/Lieber/Projects/RNAseq/UCSD_samples/UCSD_stemcell_pheno.rda") | ||
pdStem = pd | ||
pdStem$bamFile = paste0("/dcs01/ajaffe/UCSC_Epigenome/RNAseq/TopHat/", | ||
pdStem$sample, "_out/accepted_hits.bam") | ||
|
||
## hippo | ||
load("/home/epi/ajaffe/Lieber/Projects/RNAseq/HippoPublic/sra_phenotype_file.rda") | ||
pdHippo = sra | ||
pdHippo = pdHippo[-grep("ED", pdHippo$SampleID),] | ||
|
||
## snyder | ||
bam = list.files("/dcs01/ajaffe/Snyder/RNAseq/TopHat", | ||
pattern="accepted_hits.bam$", recur=TRUE,full.names=TRUE) | ||
names(bam) = list.files("/dcs01/ajaffe/Snyder/RNAseq/TopHat", | ||
pattern="accepted_hits.bam$", recur=TRUE) | ||
|
||
pheno = data.frame(sampleName = c(pdStem$run, pdHippo$SampleID, ss(names(bam), "_")), | ||
Study = c(rep("Stem", nrow(pdStem)), rep("Hippo", nrow(pdHippo)), | ||
rep("Snyder", length(bam))), | ||
bamFile = c(pdStem$bamFile, pdHippo$bamFile, bam), | ||
stringsAsFactors=FALSE) | ||
libSize = getTotalMapped(pheno$bamFile,mc.cores=12) | ||
pheno$totalMapped = libSize$totalMapped | ||
pheno$mitoMapped = libSize$mitoMapped | ||
|
||
## count genes | ||
geneCounts = featureCounts(pheno$bamFile, annot.inbuilt="hg19", | ||
useMetaFeatures = TRUE, nthreads=24) | ||
exonCounts = featureCounts(pheno$bamFile, annot.inbuilt="hg19", | ||
useMetaFeatures = FALSE,nthreads=24) | ||
save(geneCounts,exonCounts, pheno, file="featureCounts_output.rda") | ||
|
||
##### | ||
load("featureCounts_output.rda") | ||
|
||
pheno$geneAlign = as.numeric(geneCounts$stat[1,-1] / colSums(geneCounts$stat[,-1])) | ||
pheno$geneAmbig = as.numeric(geneCounts$stat[2,-1] / colSums(geneCounts$stat[,-1])) | ||
pheno$exonAlign = as.numeric(exonCounts$stat[1,-1] / colSums(exonCounts$stat[,-1])) | ||
pheno$exonAmbig = as.numeric(exonCounts$stat[2,-1] / colSums(exonCounts$stat[,-1])) | ||
|
||
signif(tapply(pheno$geneAlign, pheno$Study, mean),3) | ||
signif(tapply(pheno$geneAmbig, pheno$Study, mean),3) | ||
signif(tapply(pheno$exonAlign, pheno$Study, mean),3) | ||
signif(tapply(pheno$exonAmbig, pheno$Study, mean),3) | ||
|
||
### mito map | ||
signif(tapply(pheno$mitoMapped/(pheno$mitoMapped+pheno$totalMapped), pheno$Study, mean),3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
library('ggplot2') | ||
library('getopt') | ||
|
||
## Specify parameters | ||
spec <- matrix(c( | ||
'experiment', 'e', 1, 'character', 'Experiment', | ||
'run', 'r', 1, 'character', 'run name', | ||
'help' , 'h', 0, 'logical', 'Display help' | ||
), byrow=TRUE, ncol=5) | ||
opt <- getopt(spec) | ||
|
||
## if help was asked for print a friendly message | ||
## and exit with a non-zero error code | ||
if (!is.null(opt$help)) { | ||
cat(getopt(spec, usage=TRUE)) | ||
q(status=1) | ||
} | ||
|
||
## Check experiment input | ||
stopifnot(opt$experiment %in% c('snyder', 'hippo')) | ||
|
||
chrs <- paste0('chr', c(1:22, 'X', 'Y')) | ||
study <- opt$experiment | ||
run <- opt$run | ||
|
||
timediff <- lapply(chrs, function(chr) { | ||
info <- tryCatch(system(paste0('grep permutation *', study, '*', run, '*', chr, '.e*'), intern = TRUE), warning = function(w) { 'no data'}) | ||
if(info[1] == 'no data') { | ||
info <- tryCatch(system(paste0('grep permutation ', file.path(study, 'derAnalysis', run, chr, 'logs'), '/*', chr, '.e*'), intern = TRUE), warning = function(w) { 'no data'}) | ||
} | ||
if(info[1] == 'no data') return(NULL) | ||
|
||
time <- strptime(gsub('([[:space:]]*calculate.*$)', '', info), | ||
format = '%Y-%m-%d %H:%M:%S') | ||
|
||
idx <- seq_len(length(info) - 1) | ||
difftime(time[idx + 1], time[idx], units = 'mins') | ||
}) | ||
names(timediff) <- chrs | ||
|
||
|
||
## Organize time information | ||
chrnum <- gsub('chr', '', chrs) | ||
df <- data.frame(chr = factor(chrnum, levels = chrnum), mean = sapply(timediff, mean), sd = sapply(timediff, sd)) | ||
|
||
## Group by number of rounds per permutation given the number of chunks & cores used | ||
if(!file.exists(file.path(study, 'derAnalysis', run, 'nChunks.Rdata'))) { | ||
nChunks <- sapply(chrs, function(chr) { | ||
if(!file.exists(file.path(study, 'derAnalysis', run, chr, 'coveragePrep.Rdata'))) | ||
return(NA) | ||
load(file.path(study, 'derAnalysis', run, chr, 'coveragePrep.Rdata')) | ||
max(prep$mclapply) | ||
}) | ||
save(nChunks, file = file.path(study, 'derAnalysis', run, 'nChunks.Rdata')) | ||
} else { | ||
load(file.path(study, 'derAnalysis', run, 'nChunks.Rdata')) | ||
} | ||
|
||
if (study == 'brainspan') { | ||
nCores <- c(40, 32, 27, rep(20, 15), 29, rep(20, 4), 2) | ||
} else if (study == 'snyder') { | ||
nCores <- rep(4, 24) | ||
} else if (study == 'hippo') { | ||
nCores <- rep(2, 24) | ||
} | ||
names(nCores) <- chrs | ||
|
||
df$n <- sapply(timediff, length) | ||
df$se <- df$sd / sqrt(df$n) | ||
df$nChunks <- nChunks | ||
df$nCores <- nCores | ||
df$nRound <- factor(ceiling(nChunks / nCores)) | ||
|
||
|
||
## Print info | ||
rownames(df) <- NULL | ||
print(df) | ||
|
||
|
||
## Make plot | ||
pdf(file.path(study, 'derAnalysis', run, paste0('permuteTime-', study, '-', run, '.pdf'))) | ||
ggplot(df, aes(x = chr, y = mean, color = nRound)) + geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.1) + geom_line() + geom_point() + ylab('Time per permutation (minutes)\nMean +- SE') + xlab('Chromosome') + ggtitle(paste('Time info for', study, run)) + scale_y_continuous(breaks=seq(0, ceiling(max(df$mean + df$se, na.rm = TRUE)), 1)) | ||
dev.off() | ||
|
||
print('Expected total number of days per chr and days remaining') | ||
days <- data.frame(chr = chrnum, total = round(df$mean * 1001 / 60 / 24, 1), remaining = round(df$mean * (1001 - df$n - 2 ) / 60 / 24, 1)) | ||
rownames(days) <- NULL | ||
print(days) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Setup | ||
library("GenomicRanges") | ||
library("rtracklayer") | ||
|
||
load('/home/epi/ajaffe/Lieber/Projects/RNAseq/derannotator/rdas/GenomicState.Hsapiens.ensembl.GRCh37.p11.rda') | ||
load('/home/epi/ajaffe/Lieber/Projects/RNAseq/derannotator/rdas/GenomicState.Hsapiens.UCSC.hg19.knownGene.rda') | ||
|
||
makeGFF <- function(exonicParts, file) { | ||
message(paste(Sys.time(), "makeGFF: Saving", file)) | ||
export.gff2(exonicParts, file) | ||
} | ||
|
||
makeGFF(GenomicState.Hsapiens.ensembl.GRCh37.p11$fullGenome[ GenomicState.Hsapiens.ensembl.GRCh37.p11$fullGenome$theRegion == "exon"], "GenomicState.Hsapiens.ensembl.GRCh37.p11.exons.gff") | ||
makeGFF(GenomicState.Hsapiens.UCSC.hg19.knownGene$fullGenome[ GenomicState.Hsapiens.UCSC.hg19.knownGene$fullGenome$theRegion == "exon"], "GenomicState.Hsapiens.UCSC.hg19.knownGene.exons.gff") | ||
|
||
proc.time() | ||
sessionInfo() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
#$ -cwd | ||
#$ -l jabba,mem_free=10G,h_vmem=50G,h_fsize=40G | ||
#$ -N exonsGFF | ||
#$ -m e | ||
|
||
echo "**** Job starts ****" | ||
date | ||
|
||
## Create GFF files | ||
Rscript createGFF.R | ||
|
||
### Done | ||
echo "**** Job ends ****" | ||
date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
derAnalysis | ||
coverageToExon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
logs | ||
*Rdata | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
## Setup | ||
library('TxDb.Hsapiens.UCSC.hg19.knownGene') | ||
library('derfinder') | ||
library('Rsamtools') | ||
library('GenomicAlignments') | ||
library('parallel') | ||
options(mc.cores=24) | ||
|
||
## Exons by gene | ||
ex <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, by = 'gene') | ||
|
||
## Make bamFileList | ||
files <- rawFiles(datadir='/dcs01/ajaffe/Hippo/TopHat', samplepatt="out$", | ||
fileterm="accepted_hits.bam") | ||
names(files) <- gsub('_out', '', names(files)) | ||
bai <- paste0(files, ".bai") | ||
bList <- BamFileList(files, bai) | ||
|
||
## Compute the overlaps | ||
message(paste(Sys.time(), "summarizeOverlaps: Running summarizeOverlaps()")) | ||
summOv <- summarizeOverlaps(ex, bList, mode="Union", | ||
singleEnd=TRUE, ignore.strand=TRUE) | ||
|
||
## Finish | ||
message(paste(Sys.time(), "summarizeOverlaps: Saving summOverlaps")) | ||
save(summOv, file="summOv.Rdata") | ||
|
||
proc.time() | ||
options(width = 120) | ||
devtools::session_info() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
#$ -cwd | ||
#$ -m e | ||
#$ -l mem_free=3G,h_vmem=15G,h_fsize=30G | ||
#$ -pe local 24 | ||
#$ -N summOv-hippo-rerun | ||
|
||
echo "**** Job starts ****" | ||
date | ||
|
||
mkdir -p /dcs01/ajaffe/Brain/derRuns/derSoftware/hippo/counts-gene/logs | ||
|
||
## Summarize overlaps | ||
module load R/3.2.x | ||
Rscript counts-gene.R | ||
|
||
# Move log files into the logs directory | ||
mv /dcs01/ajaffe/Brain/derRuns/derSoftware/hippo/counts-gene/summOv-hippo-rerun.* /dcs01/ajaffe/Brain/derRuns/derSoftware/hippo/counts-gene/logs/ | ||
|
||
### Done | ||
echo "**** Job ends ****" | ||
date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
*.e* | ||
*.o* | ||
*Rdata | ||
|
Oops, something went wrong.