Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

Commit

Permalink
DEV-618: Remove manta/BPI
Browse files Browse the repository at this point in the history
  • Loading branch information
kduyvesteyn committed Dec 11, 2018
1 parent 4698deb commit 3ae8eae
Show file tree
Hide file tree
Showing 16 changed files with 84 additions and 243 deletions.
6 changes: 3 additions & 3 deletions lib/HMF/Pipeline.pm
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use HMF::Pipeline::Amber;
use HMF::Pipeline::Cobalt;
use HMF::Pipeline::GermlineCalling;
use HMF::Pipeline::Strelka;
use HMF::Pipeline::StructuralVariants;
use HMF::Pipeline::Gridss;
use HMF::Pipeline::Purple;
use HMF::Pipeline::HealthCheck;
use HMF::Pipeline::PipelineCheck;
Expand All @@ -41,15 +41,15 @@ sub run {
if (($opt->{FASTQ} and $opt->{MAPPING} eq "yes") or $opt->{BAM}) {
# KODU: Always run post stats, even if we start from BAM.
HMF::Pipeline::PostStats::run($opt) if $opt->{POSTSTATS} eq "yes" and $opt->{BAM};
HMF::Pipeline::DamageEstimate::run($opt) if $opt->{DAMAGE_ESTIMATE} eq "yes";

HMF::Pipeline::Amber::run($opt) if $opt->{AMBER} eq "yes";
HMF::Pipeline::Cobalt::run($opt) if $opt->{COBALT} eq "yes";
HMF::Pipeline::DamageEstimate::run($opt) if $opt->{DAMAGE_ESTIMATE} eq "yes";

HMF::Pipeline::GermlineCalling::run($opt) if $opt->{GERMLINE_CALLING} eq "yes";

HMF::Pipeline::Strelka::run($opt) if $opt->{STRELKA} eq "yes";
HMF::Pipeline::StructuralVariants::run($opt) if $opt->{STRUCTURAL_VARIANT_CALLING} eq "yes";
HMF::Pipeline::Gridss::run($opt) if $opt->{GRIDSS} eq "yes";
HMF::Pipeline::Purple::run($opt) if $opt->{PURPLE} eq "yes";

HMF::Pipeline::HealthCheck::run($opt) if $opt->{HEALTHCHECK} eq "yes";
Expand Down
96 changes: 39 additions & 57 deletions lib/HMF/Pipeline/Functions/Validate.pm
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ sub configChecks {
PURPLE_TIME => \&key_not_present,
PURPLE_THREADS => \&key_not_present,
PURPLE_MEM => \&key_not_present,

PURPLE_REF_GENOME_VERSION => \&key_not_present,

CIRCOS_PATH => \&missing_directory,
GC_PROFILE => \&missing_file,
}
Expand Down Expand Up @@ -406,63 +409,42 @@ sub configChecks {
TABIX_PATH => \&missing_directory,
}
),
STRUCTURAL_VARIANT_CALLING => if_enabled({
MANTA => if_enabled({
MANTA_PATH => \&missing_directory,
MANTA_QUEUE => \&key_not_present,
MANTA_THREADS => \&key_not_present,
MANTA_MEM => \&key_not_present,
MANTA_TIME => \&key_not_present,

BPI_PATH => \&missing_directory,
BPI_QUEUE => \&key_not_present,
BPI_THREADS => \&key_not_present,
BPI_MEM => \&key_not_present,
BPI_TIME => \&key_not_present,

BPI_CONTAMINATION_FRACTION => \&key_not_present,

TABIX_PATH => \&missing_directory,
}
),
GRIDSS => if_enabled({
GRIDSS_PATH => \&missing_directory,
GRIDSS_BWA_PATH => \&missing_directory,

GRIDSS_PREPROCESS_QUEUE => \&key_not_present,
GRIDSS_PREPROCESS_THREADS => \&key_not_present,
GRIDSS_PREPROCESS_MEM => \&key_not_present,
GRIDSS_PREPROCESS_TIME => \&key_not_present,

GRIDSS_ASSEMBLE_QUEUE => \&key_not_present,
GRIDSS_ASSEMBLE_THREADS => \&key_not_present,
GRIDSS_ASSEMBLE_MEM => \&key_not_present,
GRIDSS_ASSEMBLE_TIME => \&key_not_present,

GRIDSS_ASSEMBLE_POST_PROCESS_QUEUE => \&key_not_present,
GRIDSS_ASSEMBLE_POST_PROCESS_THREADS => \&key_not_present,
GRIDSS_ASSEMBLE_POST_PROCESS_MEM => \&key_not_present,
GRIDSS_ASSEMBLE_POST_PROCESS_TIME => \&key_not_present,

GRIDSS_CALLING_QUEUE => \&key_not_present,
GRIDSS_CALLING_THREADS => \&key_not_present,
GRIDSS_CALLING_MEM => \&key_not_present,
GRIDSS_CALLING_TIME => \&key_not_present,

GRIDSS_ANNOTATE_QUEUE => \&key_not_present,
GRIDSS_ANNOTATE_THREADS => \&key_not_present,
GRIDSS_ANNOTATE_MEM => \&key_not_present,
GRIDSS_ANNOTATE_TIME => \&key_not_present,

GRIDSS_THRESHOLD_COVERAGE => \&key_not_present,
GRIDSS_BWA_BASES_PER_BATCH => \&key_not_present,
GRIDSS_BLACKLIST => \&missing_file,
GRIDSS_CONFIG => \&key_not_present,
GRIDSS_PON => \&missing_directory,

TABIX_PATH => \&missing_directory,
}
),
GRIDSS => if_enabled({
GRIDSS_PATH => \&missing_directory,
GRIDSS_BWA_PATH => \&missing_directory,

GRIDSS_PREPROCESS_QUEUE => \&key_not_present,
GRIDSS_PREPROCESS_THREADS => \&key_not_present,
GRIDSS_PREPROCESS_MEM => \&key_not_present,
GRIDSS_PREPROCESS_TIME => \&key_not_present,

GRIDSS_ASSEMBLE_QUEUE => \&key_not_present,
GRIDSS_ASSEMBLE_THREADS => \&key_not_present,
GRIDSS_ASSEMBLE_MEM => \&key_not_present,
GRIDSS_ASSEMBLE_TIME => \&key_not_present,

GRIDSS_ASSEMBLE_POST_PROCESS_QUEUE => \&key_not_present,
GRIDSS_ASSEMBLE_POST_PROCESS_THREADS => \&key_not_present,
GRIDSS_ASSEMBLE_POST_PROCESS_MEM => \&key_not_present,
GRIDSS_ASSEMBLE_POST_PROCESS_TIME => \&key_not_present,

GRIDSS_CALLING_QUEUE => \&key_not_present,
GRIDSS_CALLING_THREADS => \&key_not_present,
GRIDSS_CALLING_MEM => \&key_not_present,
GRIDSS_CALLING_TIME => \&key_not_present,

GRIDSS_ANNOTATE_QUEUE => \&key_not_present,
GRIDSS_ANNOTATE_THREADS => \&key_not_present,
GRIDSS_ANNOTATE_MEM => \&key_not_present,
GRIDSS_ANNOTATE_TIME => \&key_not_present,

GRIDSS_THRESHOLD_COVERAGE => \&key_not_present,
GRIDSS_BWA_BASES_PER_BATCH => \&key_not_present,
GRIDSS_BLACKLIST => \&missing_file,
GRIDSS_CONFIG => \&key_not_present,
GRIDSS_PON => \&missing_directory,

TABIX_PATH => \&missing_directory,
}
),
HEALTHCHECK => if_enabled({
Expand Down
126 changes: 30 additions & 96 deletions lib/HMF/Pipeline/StructuralVariants.pm → lib/HMF/Pipeline/Gridss.pm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package HMF::Pipeline::StructuralVariants;
package HMF::Pipeline::Gridss;

use FindBin::libs;
use discipline;
Expand All @@ -20,36 +20,30 @@ sub run {

say "\n### SCHEDULING STRUCTURAL VARIANT CALLING ###";

$opt->{RUNNING_JOBS}->{sv} = [];
if ($opt->{MANTA} eq "yes") {
my $manta_jobs = runManta($opt);
push @{$opt->{RUNNING_JOBS}->{sv}}, @{$manta_jobs};
}
$opt->{RUNNING_JOBS}->{gridss} = [];

if ($opt->{POSTSTATS} eq "no") {
say "\n[ERROR] Cannot schedule gridss without scheduling post stats!";
} else {
# KODU: We need the insert size metrics when running gridss. Their naming comes out of poststats and is dependent on the mode we run in.
my ($ref_sample, $tumor_sample, undef, undef, undef, undef) = sampleControlBamsAndJobs($opt);
my $suffix = "_MultipleMetrics.txt.insert_size_metrics";
my $ref_sample_name;
my $tumor_sample_name;
if ($opt->{BAM}) {
$ref_sample_name = $ref_sample;
$tumor_sample_name = $tumor_sample;
} elsif ($opt->{FASTQ}) {
$ref_sample_name = join "", $ref_sample, "_dedup";
$tumor_sample_name = join "", $tumor_sample, "_dedup";
}

$opt->{REF_INSERT_SIZE_METRICS} = catfile($opt->{OUTPUT_DIR}, "QCStats", $ref_sample_name, join "", $ref_sample_name, $suffix);
$opt->{TUMOR_INSERT_SIZE_METRICS} = catfile($opt->{OUTPUT_DIR}, "QCStats", $tumor_sample_name, join "", $tumor_sample_name, $suffix);

if ($opt->{GRIDSS} eq "yes") {
if ($opt->{POSTSTATS} eq "no") {
say "\n[WARN] Cannot schedule gridss without scheduling post stats!";
} else {
# KODU: We need the insert size metrics when running gridss. Their naming comes out of poststats and is dependent on the mode we run in.
my ($ref_sample, $tumor_sample, undef, undef, undef, undef) = sampleControlBamsAndJobs($opt);
my $suffix = "_MultipleMetrics.txt.insert_size_metrics";
my $ref_sample_name;
my $tumor_sample_name;
if ($opt->{BAM}) {
$ref_sample_name = $ref_sample;
$tumor_sample_name = $tumor_sample;
} elsif ($opt->{FASTQ}) {
$ref_sample_name = join "", $ref_sample, "_dedup";
$tumor_sample_name = join "", $tumor_sample, "_dedup";
}

$opt->{REF_INSERT_SIZE_METRICS} = catfile($opt->{OUTPUT_DIR}, "QCStats", $ref_sample_name, join "", $ref_sample_name, $suffix);
$opt->{TUMOR_INSERT_SIZE_METRICS} = catfile($opt->{OUTPUT_DIR}, "QCStats", $tumor_sample_name, join "", $tumor_sample_name, $suffix);

my $gridss_jobs = runGridss($opt);
if ($gridss_jobs) {
push @{$opt->{RUNNING_JOBS}->{sv}}, @{$gridss_jobs};
}
my $gridss_jobs = runGridss($opt);
if ($gridss_jobs) {
push @{$opt->{RUNNING_JOBS}->{gridss}}, @{$gridss_jobs};
}
}

Expand Down Expand Up @@ -256,6 +250,8 @@ sub runGridssCleanup {
sub runGridssFilter {
my ($dirs, $tumor_sample, $joint_name, $dependent_jobs, $opt) = @_;

my $final_vcf = catfile($dirs->{out}, join "", ${tumor_sample}, ".gridss.somatic.vcf.gz");

# KODU: Run with GRIDSS annotate settings, filter takes little resources.
my $job_id = fromTemplate(
"GridssFilter",
Expand All @@ -267,76 +263,14 @@ sub runGridssFilter {
$opt,
tumor_sample => $tumor_sample,
joint_name => $joint_name,
final_vcf => ${final_vcf},
);

return ($job_id);
}

sub runManta {
my ($opt) = @_;

say "\n### SCHEDULING MANTA ###";

my ($ref_sample, $tumor_sample, $ref_sample_bam, $tumor_sample_bam, $joint_name, $running_jobs) = sampleControlBamsAndJobs($opt);

my @manta_jobs;
my $job_id = runMantaJob($tumor_sample_bam, $ref_sample_bam, $joint_name, $running_jobs, $opt);
push @manta_jobs, $job_id;

$job_id = runBreakpointInspector($tumor_sample, $tumor_sample_bam, $ref_sample, $ref_sample_bam, $joint_name, \@manta_jobs, $opt);
push @manta_jobs, $job_id;
return \@manta_jobs;
}

sub runMantaJob {
my ($tumor_sample_bam, $ref_sample_bam, $joint_name, $running_jobs, $opt) = @_;

my $dirs = createDirs(catfile($opt->{OUTPUT_DIR}, "structuralVariants", "manta", $joint_name));

my $job_id = fromTemplate(
"Manta",
undef,
1,
qsubTemplate($opt, "MANTA"),
$running_jobs,
$dirs,
$opt,
ref_sample_bam => $ref_sample_bam,
tumor_sample_bam => $tumor_sample_bam,
joint_name => $joint_name,
);

return $job_id;
}

sub runBreakpointInspector {
my ($tumor_sample, $tumor_sample_bam, $ref_sample, $ref_sample_bam, $joint_name, $dependent_job_ids, $opt) = @_;

my $manta_vcf = catfile($opt->{OUTPUT_DIR}, "structuralVariants", "manta", $joint_name, "results", "variants", "somaticSV.vcf.gz");

my $dirs = createDirs(catfile($opt->{OUTPUT_DIR}, "structuralVariants", "bpi", $joint_name));
$opt->{STRUCTURAL_VARIANT_VCF} = catfile($dirs->{out}, "${joint_name}_somaticSV_bpi.vcf");

my $job_id = fromTemplate(
"BreakpointInspector",
undef,
1,
qsubTemplate($opt, "BPI"),
$dependent_job_ids,
$dirs,
$opt,
ref_sample => $ref_sample,
tumor_sample => $tumor_sample,
ref_sample_bam => $ref_sample_bam,
tumor_sample_bam => $tumor_sample_bam,
joint_name => $joint_name,
input_vcf => $manta_vcf,
);

$opt->{STRUCTURAL_VARIANT_VCF} = join "", $opt->{STRUCTURAL_VARIANT_VCF}, ".gz";
$opt->{STRUCTURAL_VARIANT_VCF} = $final_vcf;
$opt->{GRIDSS_VCF} = $final_vcf;
linkVcfArtefacts($opt->{STRUCTURAL_VARIANT_VCF}, 'structural_variant', $opt);

return $job_id;
return ($job_id);
}

1;
2 changes: 1 addition & 1 deletion lib/HMF/Pipeline/Purple.pm
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ sub dependencies {
my @jobs;
push @jobs, @{$opt->{RUNNING_JOBS}->{amber}};
push @jobs, @{$opt->{RUNNING_JOBS}->{cobalt}};
push @jobs, @{$opt->{RUNNING_JOBS}->{sv}};
push @jobs, @{$opt->{RUNNING_JOBS}->{gridss}};
push @jobs, @{$opt->{RUNNING_JOBS}->{strelka}};
return \@jobs;
}
Expand Down
3 changes: 1 addition & 2 deletions settings/CheckCoverage.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ STRELKA no
AMBER no
COBALT no
PURPLE no

STRUCTURAL_VARIANT_CALLING no
GRIDSS no

HEALTHCHECK no
6 changes: 1 addition & 5 deletions settings/PurpleOnly.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ STRELKA yes
AMBER yes
COBALT yes
PURPLE yes

STRUCTURAL_VARIANT_CALLING yes
MANTA yes

GRIDSS no
GRIDSS yes

HEALTHCHECK no
3 changes: 1 addition & 2 deletions settings/SingleSample.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ STRELKA no
AMBER no
COBALT no
PURPLE no

STRUCTURAL_VARIANT_CALLING no
GRIDSS no

HEALTHCHECK yes
3 changes: 0 additions & 3 deletions settings/Somatic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ STRELKA yes
AMBER yes
COBALT yes
PURPLE yes

STRUCTURAL_VARIANT_CALLING yes
MANTA yes
GRIDSS yes

HEALTHCHECK yes
10 changes: 0 additions & 10 deletions settings/include/cluster.ini
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,6 @@ COBALT_TIME 256:0:0
COBALT_THREADS 6
COBALT_MEM 10

MANTA_QUEUE all.q
MANTA_TIME 256:0:0
MANTA_THREADS 8
MANTA_MEM 10

BPI_QUEUE all.q
BPI_TIME 256:0:0
BPI_THREADS 1
BPI_MEM 4

GRIDSS_PREPROCESS_QUEUE all.q
GRIDSS_PREPROCESS_TIME 256:0:0
GRIDSS_PREPROCESS_THREADS 2
Expand Down
2 changes: 1 addition & 1 deletion settings/include/settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ANNOTATE_SNPEFF_FLAGS -hgvs -lof -no-downstream -ud 1000 -no-intergenic -noShift
ANNOTATE_DBNSFP_FIELDS hg38_chr,hg38_pos,genename,Uniprot_acc,Uniprot_id,Uniprot_aapos,Interpro_domain,cds_strand,refcodon,SLR_test_statistic,codonpos,fold-degenerate,Ancestral_allele,Ensembl_geneid,Ensembl_transcriptid,aapos,aapos_SIFT,aapos_FATHMM,SIFT_score,SIFT_converted_rankscore,SIFT_pred,Polyphen2_HDIV_score,Polyphen2_HDIV_rankscore,Polyphen2_HDIV_pred,Polyphen2_HVAR_score,Polyphen2_HVAR_rankscore,Polyphen2_HVAR_pred,LRT_score,LRT_converted_rankscore,LRT_pred,MutationTaster_score,MutationTaster_converted_rankscore,MutationTaster_pred,MutationAssessor_score,MutationAssessor_rankscore,MutationAssessor_pred,FATHMM_score,FATHMM_rankscore,FATHMM_pred,MetaSVM_score,MetaSVM_rankscore,MetaSVM_pred,MetaLR_score,MetaLR_rankscore,MetaLR_pred,Reliability_index,VEST3_score,VEST3_rankscore,PROVEAN_score,PROVEAN_converted_rankscore,PROVEAN_pred,CADD_raw,CADD_raw_rankscore,CADD_phred,GERP++_NR,GERP++_RS,GERP++_RS_rankscore,phyloP46way_primate,phyloP46way_primate_rankscore,phyloP46way_placental,phyloP46way_placental_rankscore,phyloP100way_vertebrate,phyloP100way_vertebrate_rankscore,phastCons46way_primate,phastCons46way_primate_rankscore,phastCons46way_placental,phastCons46way_placental_rankscore,phastCons100way_vertebrate,phastCons100way_vertebrate_rankscore,SiPhy_29way_pi,SiPhy_29way_logOdds,SiPhy_29way_logOdds_rankscore,LRT_Omega,UniSNP_ids,1000Gp1_AC,1000Gp1_AF,1000Gp1_AFR_AC,1000Gp1_AFR_AF,1000Gp1_EUR_AC,1000Gp1_EUR_AF,1000Gp1_AMR_AC,1000Gp1_AMR_AF,1000Gp1_ASN_AC,1000Gp1_ASN_AF,ESP6500_AA_AF,ESP6500_EA_AF,ARIC5606_AA_AC,ARIC5606_AA_AF,ARIC5606_EA_AC,ARIC5606_EA_AF,ExAC_AC,ExAC_AF,ExAC_Adj_AC,ExAC_Adj_AF,ExAC_AFR_AC,ExAC_AFR_AF,ExAC_AMR_AC,ExAC_AMR_AF,ExAC_EAS_AC,ExAC_EAS_AF,ExAC_FIN_AC,ExAC_FIN_AF,ExAC_NFE_AC,ExAC_NFE_AF,ExAC_SAS_AC,ExAC_SAS_AF,clinvar_rs,clinvar_clnsig,clinvar_trait,COSMIC_ID,COSMIC_CNT
ANNOTATE_FREQ_INFO AF,AN,AC

BPI_CONTAMINATION_FRACTION 0.03
PURPLE_REF_GENOME_VERSION hg19

GRIDSS_THRESHOLD_COVERAGE 50000
GRIDSS_BWA_BASES_PER_BATCH 40000000
2 changes: 0 additions & 2 deletions settings/include/tools.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ COBALT_PATH /data/tools/cobalt_v1.4
PURPLE_PATH /data/tools/purple_v2.17
CIRCOS_PATH /data/tools/circos_v0.69.6/bin

MANTA_PATH /data/tools/manta_v1.0.3/bin
BPI_PATH /data/tools/break-point-inspector_v1.7
GRIDSS_PATH /data/tools/gridss_v2.0.1
GRIDSS_BWA_PATH /data/tools/bwa_v0.7.17

Expand Down
Loading

0 comments on commit 3ae8eae

Please sign in to comment.