-
Notifications
You must be signed in to change notification settings - Fork 2
/
work_log_GBM
74 lines (40 loc) · 9.6 KB
/
work_log_GBM
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
## 1. create directory
rm -rf /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM
mkdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM
mkdir /gscmnt/gc2533/dinglab/scao/cptac-neo/worklog
## 2. get exome bam path
perl -e '$f="/gscmnt/gc2518/dinglab/scao/home/git/CPTAC3.catalog/BamMap/MGI.BamMap.dat"; $f_s="/gscmnt/gc2517/dinglab/scao/cptac-neo/data/v1.1/Case_ID/GBM/Case_ID_GBM.tsv"; foreach $l (`cat $f_s`) { $ltr=$l; chomp($ltr); if($ltr=~/^CASE/) { next; } else { @t=split("\t",$ltr); $slist{$t[0]}=1; }} foreach $l (`cat $f`) { $ltr=$l; chomp($ltr); if($ltr=~/^#/) { next; } else { @t=split("\t",$ltr); $sn=$t[1]; if(defined $slist{$sn} && $t[3] eq "WXS" && $t[4] eq "blood_normal") { $bam{$sn}=$t[5]; }} } print "Sample","\t","WXS_normal_bam_path","\n"; foreach $s (sort keys %bam) { print $s,"\t",$bam{$s},"\n"; }' > /gscmnt/gc2533/dinglab/scao/cptac-neo/worklog/GBM.WXS.normal.bam.tsv
#### 3. softlink exome normal bam
perl -e '$dir="/gscmnt/gc2533/dinglab/scao/cptac-neo/GBM"; $f_bam="/gscmnt/gc2533/dinglab/scao/cptac-neo/worklog/GBM.WXS.normal.bam.tsv"; foreach $l (`cat $f_bam`) { $ltr=$l; chomp($ltr); if($ltr=~/^Sample/) { next; } @temp=split("\t",$ltr); $sn=$temp[0]; $sample_to_bam{$sn}=$temp[1]; $cint=int($cc/25); $dirx=$dir."/work".$cint; if (!-d $dirx) { `mkdir $dirx`}; $dir_o=$dirx."/".$sn; $sample_2_dir{$sn}=$dir_o."/".$sn; if (! -d $dir_o) { `mkdir $dir_o`; $f_o=$dir_o."/".$sn.".bam"; `ln -s $temp[1] $f_o`; } $cc++; }'
## 4. generate vcf ##
## 1 12795514 C A PRAMEF1 p.Q315K Somatic
perl -e '$dir="/gscmnt/gc2533/dinglab/scao/cptac-neo/GBM"; $f_maf="/gscmnt/gc2517/dinglab/scao/cptac-neo/data/v1.1/PanCan_Union_Maf_Broad_WashU_v1.1.maf"; $f_bam="/gscmnt/gc2533/dinglab/scao/cptac-neo/worklog/GBM.WXS.normal.bam.tsv"; foreach $l (`cat $f_bam`) { $ltr=$l; chomp($ltr); @temp=split("\t",$ltr); if($ltr=~/^Sample/) { next; } $sn=$temp[0]; $sample_to_bam{$sn}=$temp[1]; $cint=int($cc/25); $dirx=$dir."/work".$cint; if (!-d $dirx) { `mkdir $dirx`}; $dir_o=$dirx."/".$sn; $sample_2_dir{$sn}=$dir_o; $cc++; $f_indel=$dir_o."/".$sn.".indel.vcf"; $f_snp=$dir_o."/".$sn.".snp.vcf"; `rm $f_indel`; `rm $f_snp`; print $sn,"\t",$temp[1],"\n"; } print "reading maf file\n"; foreach $l (`cat $f_maf`) { $ltr=$l; chomp($ltr); if($ltr=~/^Hugo/) { next; } @temp=split("\t",$ltr); $sn=$temp[12]; $sn=~s/_T//g; print $sn,"\n"; $chr=$temp[3]; $chr=~s/chr//g; if(defined $sample_2_dir{$sn}) { if($temp[7] eq "Frame_Shift_Del" || $temp[7] eq "Frame_Shift_Ins" || $temp[7] eq "In_Frame_Del" || $temp[7] eq "In_Frame_Ins" || $temp[7] eq "Missense_Mutation" || $temp[7] eq "Nonstop_Mutation") { $f_out_indel=$sample_2_dir{$sn}."/".$sn.".indel.vcf"; open(OUT2,">>$f_out_indel"); $f_out_snp=$sample_2_dir{$sn}."/".$sn.".snp.vcf"; open(OUT1,">>$f_out_snp"); if($temp[8] eq "SNP") { print OUT1 $chr,"\t",$temp[4],"\t",$temp[9],"\t",$temp[11],"\t",$temp[0],"\t",$temp[26],"\t","Somatic","\n";} if($temp[8] eq "DEL" || $temp[8] eq "INS") { print OUT2 $chr,"\t",$temp[4],"\t",$temp[9],"\t",$temp[11],"\t",$temp[0],"\t",$temp[26],"\t","Somatic","\n"; }}}}'
## 5. generate log directory
mkdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log
## 6. run neoscan pipeline
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work0 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 3
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work1 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 3
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work2 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 3
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work3 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 3
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work0 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 1
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work1 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 1
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work2 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 1
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work3 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 1
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work0 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 2
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work1 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 2
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work2 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 2
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work3 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 2
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work0 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 4
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work1 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 4
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work2 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 4
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work3 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 4
### 5
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work0 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 5
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work1 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 5
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work2 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 5
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work3 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 5
### 6
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work0 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 6
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work1 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 6
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work2 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 6
perl neoscan.pl --rdir /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM/work3 --log /gscmnt/gc2533/dinglab/scao/cptac-neo/GBM.log --bam 1 --bed /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29/proteome.bed --rna 0 --refdir /gscmnt/gc2518/dinglab/scao/db/refseq_hg38_june29 --step 6