Skip to content

Commit

Permalink
upload
Browse files Browse the repository at this point in the history
  • Loading branch information
wk8910 committed Sep 27, 2018
1 parent aa69c6b commit 2039d15
Show file tree
Hide file tree
Showing 31 changed files with 22,738 additions and 1 deletion.
124 changes: 124 additions & 0 deletions 00.basic_scripts/gff2cds.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#! /usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Seq;

sub translate_nucl{
my $seq=shift;
my $seq_obj=Bio::Seq->new(-seq=>$seq,-alphabet=>'dna');
my $pro=$seq_obj->translate;
$pro=$pro->seq;
return($pro);
}

my ($gff,$genome_fa,$cds)=@ARGV;
die "Usage: $0 <gff file> <genome file> <output>\n" if(@ARGV<3);

my %gff;
my %len;
open(I,"< $gff");
my $no=0;
while(<I>){
chomp;
next if(/^#/);
next if(/^\s*$/);
my @a=split(/\s+/);
next unless($a[2] eq "CDS");
$no++;
my ($chr,$source,$start,$end,$strand,$phase,$name)=($a[0],$a[1],$a[3],$a[4],$a[6],$a[7],$a[8]);
# $chr=~s/chr//g;
$name=~/Parent=([^;]+)/;
$name=$1;
if(!exists $len{$name}){
$len{$name}{chr}=$chr;
$len{$name}{source}=$source;
$len{$name}{start}=$start;
$len{$name}{end}=$end;
$len{$name}{strand}=$strand;
}
else {
if($start < $len{$name}{start}){
$len{$name}{start}=$start;
}
if($end > $len{$name}{end}){
$len{$name}{end} = $end;
}
}
$len{$name}{cds}.=$_."\n";
$gff{$chr}{$name}{$no}{start}=$start;
$gff{$chr}{$name}{$no}{end}=$end;
$gff{$chr}{$name}{$no}{strand}=$strand;
$gff{$chr}{$name}{$no}{phase}=$phase;
}
close I;

my $fa=Bio::SeqIO->new(-file=>$genome_fa,-format=>'fasta');

my %whitelist;
open(O,"> $cds.cds");
open(E,"> $cds.pep");
while(my $seq=$fa->next_seq){
my $chr=$seq->id;
my $seq=$seq->seq;
#print ">$id\n$seq\n";
next unless(exists $gff{$chr});
foreach my $name(keys %{$gff{$chr}}){
my $strand="NA";
my $line="";
my $light=0;
foreach my $no(sort { $gff{$chr}{$name}{$a}{start} <=> $gff{$chr}{$name}{$b}{start} } keys %{$gff{$chr}{$name}}){
if($strand eq "NA"){
$strand=$gff{$chr}{$name}{$no}{strand};
}
my $start=$gff{$chr}{$name}{$no}{start};
my $end=$gff{$chr}{$name}{$no}{end};
my $len=$end-$start+1;
my $subline=substr($seq,$start-1,$len);
$line.=$subline;
}
if($strand eq "+"){
my $pep=translate_nucl($line);
$pep=~/^(.*).$/;
my $stop_test=$1;
next if($stop_test=~/\*/);
$light=1;
print O ">$name\n$line\n";
print E ">$name\n$pep\n";
}
elsif($strand eq "-"){
$line=reverse($line);
$line=~tr/ATCGatcg/TAGCtagc/;
my $pep=translate_nucl($line);
$pep=~/^(.*).$/;
my $stop_test=$1;
next if($stop_test=~/\*/);
$light=1;
print O ">$name\n$line\n";
print E ">$name\n$pep\n";
}
if($light==1){
$whitelist{$name}=1;
print STDERR "$chr\t$name\n";
}
}
}
close O;
close E;

open O,"> $cds.noStopCodon.gff";
foreach my $name(sort keys %whitelist){
my $chr=$len{$name}{chr};
my $source=$len{$name}{source};
my $start=$len{$name}{start};
my $end=$len{$name}{end};
my $strand=$len{$name}{strand};
my @gene_line=($chr,$source,"gene",$start,$end,".",$strand,".","ID=$name;Parent=$name;");
my $gene_line=join "\t",@gene_line;
print O "$gene_line\n";
my @mrna_line=($chr,$source,"mRNA",$start,$end,".",$strand,".","ID=$name;Parent=$name;");
my $mrna_line=join "\t",@mrna_line;
print O "$mrna_line\n";
print O "$len{$name}{cds}";
}
close O;
24 changes: 24 additions & 0 deletions 00.basic_scripts/translateCDS2Protein.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#! /usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;
use Bio::Seq;

my $file=shift;

my $fa=Bio::SeqIO->new(-file=>$file,-format=>'fasta');

while(my $seq_obj=$fa->next_seq){
my $id=$seq_obj->id;
my $seq=$seq_obj->seq;
$seq=&translate_nucl($seq);
print ">$id\n$seq\n";
}

sub translate_nucl{
my $seq=shift;
my $seq_obj=Bio::Seq->new(-seq=>$seq,-alphabet=>'dna');
my $pro=$seq_obj->translate;
$pro=$pro->seq;
return($pro);
}
30 changes: 30 additions & 0 deletions 01.mpest/01.generate_control_file.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! /usr/bin/env perl
use strict;
use warnings;

# my $all_tree="all.gene_tree";
my $all_tree=shift;
# my $out="control_file";

open I,"< $all_tree";
my %name;
my $count=0;
while(<I>){
chomp;
while(/([^(),]+):/g){
my $id=$1;
next if(length($id) == 0);
$name{$id}=1;
}
$count++;
}
close I;
my $species_number=keys %name;

# open O,"> $out";
print "$all_tree\n0\n3141592653589793\n20\n$count\t$species_number\n";
foreach my $id(sort keys %name){
print "$id\t1\t$id\n";
}
print "0\n";
# close O;
39 changes: 39 additions & 0 deletions 01.mpest/02.generate_bootstrap_input.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#! /usr/bin/env perl
use strict;
use warnings;

my $dir="genes";
my $outdir="bootstrap";
`mkdir $outdir` if(!-e $outdir);

my @genes=<$dir/RAxML_bootstrap.*.phb>;

my $bootstrap_number=100;

my %genes;
my $sn=0;
foreach my $genes(@genes){
open I,"< $genes";
my $tmp=0;
while(<I>){
chomp;
$genes{$sn}{$tmp}=$_;
$tmp++;
}
close I;
$sn++;
}

for(my $i=0;$i<$bootstrap_number;$i++){
`mkdir $outdir/$i`;
open O,"> $outdir/$i/all.tre";
for(my $i=0;$i<$sn;$i++){
my $select=int(rand($sn));
my $tmp_num=keys %{$genes{$select}};
my $tmp=int(rand($tmp_num));
# print "$select\t$tmp\n";
my $tree=$genes{$select}{$tmp};
print O "$tree\n";
}
close O;
}
14 changes: 14 additions & 0 deletions 01.mpest/03.run_bootstrap.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#! /usr/bin/env perl
use strict;
use warnings;

my $dir="bootstrap";

my @bootstrap=<$dir/*>;

open O,"> $0.sh";
foreach my $bootstrap(@bootstrap){
print O "perl 01.generate_control_file.pl $bootstrap/all.tre > $bootstrap/control_file; ~/software/MP-EST/mpest_1.5/src/mpest $bootstrap/control_file\n";
}
close O;

13 changes: 13 additions & 0 deletions 01.mpest/04.convertAndCollect.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#! /usr/bin/env perl
use strict;
use warnings;

my $dir="bootstrap";

my @file=<$dir/*/all.tre.tre>;

foreach my $file(@file){
`perl convertNEXUS2newick.pl $file`;
}

`cat $dir/*/all.tre.tre.nwk > bootstrap.tre`;
18 changes: 18 additions & 0 deletions 02.count_tpm/01.count.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#! /usr/bin/env perl
use strict;
use warnings;

my $bamdir="bam";
my $outdir="count";
`mkdir $outdir` if(!-e $outdir);
my @bam=<$bamdir/*.bam>;
# my $bed="regions.bed";
my $tool="scripts/sam2reads_count.pl";

open O,"> $0.sh";
foreach my $bam(@bam){
$bam=~/([^\/]+)$/;
my $file=$1;
print O "samtools view $bam | perl $tool > $outdir/$file.count\n";
}
close O;
86 changes: 86 additions & 0 deletions 02.count_tpm/02.generate_tpm.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#! /usr/bin/env perl
use strict;
use warnings;

my $dir="count";
my $len="cds.len";
my $rpkm_out="$0.tpm";
my $count_out="$0.count";

my %len;
open I,"< $len";
while (<I>) {
chomp;
my @a=split(/\s+/);
my ($id,$len,$e_len)=@a;
$len{$id}=$e_len;
}
close I;

my @txt=<$dir/*.count>;
my %count;
my %rpkm;
my @sample_id;
foreach my $txt(@txt){
$txt=~/\/([^\/]+).count/;
my $sample_id=$1;
push @sample_id,$sample_id;
my $total=0;
open I,"< $txt";
while (<I>) {
chomp;
my @a=split(/\s+/);
my ($id,$count)=@a;
next if(!exists$len{$id});
my $len=$len{$id};
if($len==0){
#print STDERR "$id\n";
next;
}
my $rate=$count/$len;
$total+=$rate;
$count{$sample_id}{$id}=$count;
}
foreach my $id(keys %len){
next if(!exists$len{$id});
my $len=$len{$id};
next if($len==0);
my $count=0;
if(exists $count{$sample_id}{$id}){
$count=$count{$sample_id}{$id};
}
my $rate=$count/$len;
# my $rpkm=($count*1e9)/($total*$len);
my $tpm=($rate/$total)*1e6;
$rpkm{$id}{$sample_id}{rpkm}=$tpm;
$rpkm{$id}{$sample_id}{count}=$count;
}
close I;
}

open R,"> $rpkm_out";
open C,"> $count_out";
my $head=join "\t",@sample_id;
print R "\t$head\n";
print C "\t$head\n";
foreach my $id(sort keys %rpkm){
my @rpkm=($id);
my @count=($id);
foreach my $sample_id(@sample_id){
my ($rpkm,$count)=(0,0);
if(exists $rpkm{$id}{$sample_id}{rpkm}){
$rpkm = $rpkm{$id}{$sample_id}{rpkm};
}
if(exists $rpkm{$id}{$sample_id}{count}){
$count = $rpkm{$id}{$sample_id}{count};
}
push @rpkm,$rpkm;
push @count,$count;
}
my $rpkm_line=join "\t",@rpkm;
my $count_line=join "\t",@count;
print R "$rpkm_line\n";
print C "$count_line\n";
}
close R;
close C;
19 changes: 19 additions & 0 deletions 02.count_tpm/scripts/sam2reads_count.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#! /usr/bin/env perl
use strict;
use warnings;

my %hash;
my $control=0;
while(<>){
chomp;
my @a=split(/\s+/);
my $flag=$a[1];
next if($flag>256);
next if($flag & 4); my $id=$a[2];
$hash{$id}++;
# last if($control++>1000);
}

foreach my $id(sort {$hash{$b} <=> $hash{$a}} keys %hash){
print "$id\t$hash{$id}\n";
}
Loading

0 comments on commit 2039d15

Please sign in to comment.