Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --db-index, --db-file, --taxonomy, --db-pipe for piped db files #89

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 86 additions & 60 deletions scripts/kraken
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ my $min_hits = 1;
my $fasta_input = 0;
my $fastq_input = 0;
my $db_prefix;
my $idx_file;
my $kdb_file;
my $db_size;
my $idx_size;
my $db_pipe = 0;
my $taxonomy;
my $threads;
my $preload = 0;
my $gunzip = 0;
Expand All @@ -55,7 +61,7 @@ my $check_names = 0;
my $only_classified_output = 0;
my $unclassified_out;
my $classified_out;
my $outfile;
my @outfiles;

GetOptions(
"help" => \&display_help,
Expand All @@ -64,11 +70,17 @@ GetOptions(
"threads=i" => \$threads,
"fasta-input" => \$fasta_input,
"fastq-input" => \$fastq_input,
"db-index=s" => \$idx_file,
"db-file=s" => \$kdb_file,
"db-size=i" => \$db_size,
"index-size=i" => \$idx_size,
"db-pipe" => \$db_pipe,
"taxonomy=s" => \$taxonomy,
"quick" => \$quick,
"min-hits=i" => \$min_hits,
"unclassified-out=s" => \$unclassified_out,
"classified-out=s" => \$classified_out,
"output=s" => \$outfile,
"output=s" => \@outfiles,
"preload" => \$preload,
"paired" => \$paired,
"check-names" => \$check_names,
Expand All @@ -85,31 +97,41 @@ if (! @ARGV) {
print STDERR "Need to specify input filenames!\n";
usage();
}
eval { $db_prefix = krakenlib::find_db($db_prefix); };
if ($@) {
die "$PROG: $@";

if (!defined $kdb_file || !defined $idx_file || !defined $taxonomy) {
eval { $db_prefix = krakenlib::find_db($db_prefix); };
if ($@) {
die "$PROG: $@";
}
}

my $taxonomy = "$db_prefix/taxonomy/nodes.dmp";
if (!defined $taxonomy) {
$taxonomy = "$db_prefix/taxonomy/nodes.dmp";
}
if ($quick) {
undef $taxonomy; # Skip loading nodes file, not needed in quick mode
undef $taxonomy; # Skip loading nodes file, not needed in quick mode
}

my $kdb_file = "$db_prefix/database.kdb";
my $idx_file = "$db_prefix/database.idx";
if (! -e $kdb_file) {
die "$PROG: $kdb_file does not exist!\n";
if (!defined $kdb_file) {
$kdb_file = "$db_prefix/database.kdb";
if (! -e $kdb_file) {
die "$PROG: $kdb_file does not exist!\n";
}
}
if (! -e $idx_file) {
die "$PROG: $idx_file does not exist!\n";

if (!defined $idx_file) {
$idx_file = "$db_prefix/database.idx";
if (! -e $idx_file) {
die "$PROG: $idx_file does not exist!\n";
}
}

if ($min_hits > 1 && ! $quick) {
die "$PROG: --min_hits requires --quick to be specified\n";
}

if ($paired && @ARGV != 2) {
die "$PROG: --paired requires exactly two filenames\n";
if ($paired && scalar @ARGV % 2 ) {
die "$PROG: --paired requires 2x filenames\n";
}

my $compressed = $gunzip || $bunzip2;
Expand All @@ -135,65 +157,64 @@ if ($auto_detect) {
my @flags;
push @flags, "-d", $kdb_file;
push @flags, "-i", $idx_file;
push @flags, "-D", $db_size if defined $db_size;
push @flags, "-I", $idx_size if defined $idx_size;
push @flags, "-t", $threads if $threads > 1;
push @flags, "-n", $taxonomy if defined $taxonomy;
push @flags, "-q" if $quick;
push @flags, "-p" if $db_pipe;
push @flags, "-m", $min_hits if $min_hits > 1;
push @flags, "-f" if $fastq_input && ! $paired; # merger always outputs FASTA
push @flags, "-U", $unclassified_out if defined $unclassified_out;
push @flags, "-C", $classified_out if defined $classified_out;
push @flags, "-o", $outfile if defined $outfile;
if (@outfiles) {
foreach my $outfile (@outfiles) {
push @flags, "-o", $outfile;
}
}
push @flags, "-c", if $only_classified_output;
push @flags, "-M" if $preload;

# handle piping for decompression/merging
my @pipe_argv;
if ($paired) {
my @merge_flags;
push @merge_flags, "--fa" if $fasta_input;
push @merge_flags, "--fq" if $fastq_input;
push @merge_flags, "--gz" if $gunzip;
push @merge_flags, "--bz2" if $bunzip2;
push @merge_flags, "--check-names" if $check_names;
@pipe_argv = ("read_merger.pl", @merge_flags, @ARGV);
}
elsif ($compressed) {
if ($gunzip) {
@pipe_argv = ("gzip", "-dc", @ARGV);
}
elsif ($bunzip2) {
@pipe_argv = ("bzip2", "-dc", @ARGV);
}
else {
die "$PROG: unrecognized compression program! This is a Kraken bug.\n";
}
}
my @pipe_argvv;
foreach (my $i = 0; $i < @ARGV; $i++) {
my @pipe_argv;
my $use_pipe = 0;
if ($paired) {
$use_pipe = 1;

my @merge_flags;
push @merge_flags, "--fa" if $fasta_input;
push @merge_flags, "--fq" if $fastq_input;
push @merge_flags, "--gz" if $gunzip;
push @merge_flags, "--bz2" if $bunzip2;
push @merge_flags, "--check-names" if $check_names;
@pipe_argv = ("read_merger.pl", @merge_flags, @ARGV[$i, $i+1]);
$i++;
}
elsif ($compressed) {
$use_pipe = 1;

if ($gunzip) {
@pipe_argv = ("gzip", "-dc", $ARGV[$i]);
}
elsif ($bunzip2) {
@pipe_argv = ("bzip2", "-dc", $ARGV[$i]);
}
else {
die "$PROG: unrecognized compression program! This is a Kraken bug.\n";
}
} else {
@pipe_argv = $ARGV[$i];
}
if ($use_pipe) {
@pipe_argv = ( "<(", @pipe_argv, ")" );
}
push @pipe_argvv, @pipe_argv;

# if args exist, set up the pipe/fork/exec
if (@pipe_argv) {
pipe RD, WR;
my $pid = fork();
if ($pid < 0) {
die "$PROG: fork error: $!\n";
}
if ($pid) {
open STDIN, "<&RD"
or die "$PROG: can't dup stdin to read end of pipe: $!\n";
close RD;
close WR;
@ARGV = ("/dev/fd/0"); # make classifier read from pipe
}
else {
open STDOUT, ">&WR"
or die "$PROG: can't dup stdout to write end of pipe: $!\n";
close RD;
close WR;
exec @pipe_argv
or die "$PROG: can't exec $pipe_argv[0]: $!\n";
}
}

exec $CLASSIFY, @flags, @ARGV;
exec join(" ", ("exec /bin/bash -c \"exec", $CLASSIFY, @flags, @pipe_argvv, "\""));
die "$PROG: exec error: $!\n";

sub usage {
Expand All @@ -212,6 +233,11 @@ Options:
--fastq-input Input is FASTQ format
--gzip-compressed Input is gzip compressed
--bzip2-compressed Input is bzip2 compressed
--db-index Path for db index file
--db-file Path for db file
--db-size Size of db file
--index-size Size of db index file
--db-pipe DB is piped
--quick Quick operation (use first hit or hits)
--min-hits NUM In quick op., number of hits req'd for classification
NOTE: this is ignored if --quick is not specified
Expand Down
6 changes: 3 additions & 3 deletions scripts/read_merger.pl
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
for my $file (@ARGV) {
if (! -e $file) {
die "$PROG: $file does not exist\n";
}
if (! -f $file) {
die "$PROG: $file is not a regular file\n";
}
if (! (-f $file || -p $file)) {
die "$PROG: $file is not a file or pipe\n";
}
}

Expand Down
Loading