-
Notifications
You must be signed in to change notification settings - Fork 2
/
filter_reads_in_cdna.pl
87 lines (70 loc) · 1.31 KB
/
filter_reads_in_cdna.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
## 12-09-2016 ##
#!/usr/bin/perl
use strict;
(my $usage = <<OUT) =~ s/\t+//g;
This script will parse mapped read
OUT
die $usage unless @ARGV == 3;
my($f_map_in,$f_fq_in,$f_fq_out)=@ARGV;
my %num=();
my $nmax=0;
open(IN,"<$f_map_in");
open(IN_fq,"<$f_fq_in");
open(OUT,">$f_fq_out");
while(<IN>)
{
my $line=$_;
chomp($line);
if($line=~/^\@SQ/) { next; }
my @temp=split("\t",$line);
my $cigar=$temp[5];
if($cigar=~/(\d+)M/) {
$num{$1}++; }
}
close IN;
foreach my $n (sort { $num{$b} <=> $num{$a} } keys %num)
{
$nmax=$n;
last;
}
#print $nmax,"\n";
my %filter_id=();
open(IN,"<$f_map_in");
while(<IN>)
{
my $line=$_;
chomp($line);
if($line=~/^\@SQ/) { next; }
my @temp=split("\t",$line);
my $cigar=$temp[5];
my $flag=$temp[1];
my $nm=$temp[12];
if($cigar=~/^$nmax\M/ && $nm=~/NM:i:0/)
{
my $id=$temp[0];
$filter_id{$id}=1;
}
}
close IN;
my $write_up=1;
my $cc=0;
while(<IN_fq>)
{
my $line=$_;
chomp($line);
my $int4=$cc-int($cc/4)*4;
if($line=~/^\@/ && $int4==0)
{
my $id=$line;
$id=~s/^\@//g;
if(defined $filter_id{$id}) { $write_up=0;}
else { $write_up=1; print OUT $line,"\n"; }
}
else
{
if($write_up==1) { print OUT $line,"\n"; }
}
$cc++;
}
close IN_fq;
close OUT;