-
Notifications
You must be signed in to change notification settings - Fork 2
/
generate_mut_peptide_indel.pl
104 lines (88 loc) · 2.13 KB
/
generate_mut_peptide_indel.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/perl
use strict;
(my $usage = <<OUT) =~ s/\t+//g;
This script will generate the peptide database with mutations
perl generate_mut_peptide.pl f_bed f_snv_wt f_snv_mut f_out
OUT
die $usage unless @ARGV == 4;
my($f_bed,$f_snv_wt,$f_snv_mut,$f_out)=@ARGV;
open(SNVWT,"<$f_snv_wt");
#open(INDELWT,"<$f_indel_wt");
open(BED,"<$f_bed");
open(SNVMUT,"<$f_snv_mut");
#open(INDELMUT,"<$f_indel_mut");
open(OUT,">$f_out");
my %wt_seq=();
my %indel_pos=();
my $rd_id;
my %strandness=();
while(<BED>)
{
my $l=$_;
chomp($l);
my @temp=split("\t",$l);
$strandness{$temp[3]}=$temp[5];
}
while(<SNVWT>)
{
my $line=$_;
chomp($line);
if($line=~/^\>/) { $rd_id=$line; $rd_id=~s/^>//g; }
else { $wt_seq{$rd_id}=$line; }
}
close SNVMT;
while(<SNVMUT>)
{
my $line=$_;
chomp($line);
if($line=~/^\>/) { $rd_id=$line; $rd_id=~s/^>//g; }
else {
my @temp=split("-",$rd_id); my $id=$temp[0];
#print $id,"\n";
for(my $i=0;$i<length($line);$i++)
{
my $r1=substr($line,$i,1);
my $r2=substr($wt_seq{$id},$i,1);
#print $r1,"\n";
#print $r2,"\n";
if($r1 ne $r2) { $indel_pos{$rd_id}{$i}=1; }
}
}
}
close SNVMUT;
open(SNVMUT,"<$f_snv_mut");
while(<SNVMUT>)
{
my $line=$_;
chomp($line);
if($line=~/^\>/) { $rd_id=$line; $rd_id=~s/^>//g; }
else {
my $pep=$line;
my @temp=split("chr",$rd_id);
my $n_temp=scalar @temp;
my $ind=0;
foreach my $p (sort {$a<=>$b} keys %{$indel_pos{$rd_id}})
{
#print $p,"\n";
my $start=$p-12;
my $end=$p+12;
if($start<0) { $start=0; }
if($end>length($pep)-1) { $end=length($pep)-1; }
my $pepstr=substr($pep,$start,$end-$start);
my $np=$temp[0]; $np=~s/-VAR://g;
#print $np,"\t",$strandness{$np},"\n";
#<STDIN>;
if($strandness{$np} eq "-")
{
print OUT ">",$temp[0],"chr",$temp[$n_temp-1-$ind],"\n";
}
else
{
print OUT ">",$temp[0],"chr",$temp[$ind+1],"\n";
}
$ind++;
print OUT $pepstr,"\n";
}
}
}
close SNVMUT;