forked from ding-lab/VirusScan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_split_cdhit.pl
75 lines (68 loc) · 1.64 KB
/
check_split_cdhit.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/perl -w
use strict;
my $usage='
perl script <sample folder>
<sample folder> = full path of the folder holding files for a sample
';
die $usage unless scalar @ARGV == 1;
my ( $dir ) = @ARGV;
my $finished = &check_split_output($dir);
#print $finished;
exit ($finished);
###########################################################################
sub check_split_output {
my ( $dir ) = @_;
my $tot_cdhit_seq=0;
my $tot_seq = 0;
opendir(DH, $dir) or return 10;
foreach my $name (readdir DH) {
if ($name =~/\.cdhit_out$/) {
my $cdFile = $dir."/".$name;
open (IN, $cdFile) or return 10;
while (my $line = <IN>){
if ($line =~ ">") {
$tot_cdhit_seq++;
}
}
close IN;
}
if ($name =~ /\.cdhit_out_RepeatMasker$/) { # RepeatMasker directory
my $full_path = $dir."/".$name;
opendir(SubDH, $full_path) or return 10;
foreach my $file (readdir SubDH) {
if ($file =~ /\.fa$/) {
my $faFile = $full_path."/".$file;
my $count = 0;
open (IN, $faFile) or return 10;
while (my $line = <IN>){
if ($line =~ ">") {
$count++;
}
}
close IN;
$tot_seq += $count;
}
}
close SubDH;
}
}
close DH;
# print "$tot_cdhit_seq\n";
# print "$tot_seq\n";
if($tot_cdhit_seq==$tot_seq) { return 1; }
else {
opendir(DH, $dir) or return 10;
foreach my $name (readdir DH) {
if ($name =~ /\.cdhit_out_RepeatMasker$/) {
my $full_path = $dir."/".$name;
my $com1 = "rm -rf $full_path";
my $com2 ="mkdir $full_path";
# print "com is $com\n";
system ( $com1 );
system ( $com2 );
}
}
close DH;
return 10;
}
}