Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Arriba (and respective pVACfuse step) to the immuno workflow #115

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
52 changes: 47 additions & 5 deletions definitions/immuno.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct Rnaseq {
Array[File] stringtie_expression
Array[File] kallisto_expression
StarFusion star_fusion
Array[File] arriba
Array[File] fusioninspector_evidence
}

Expand Down Expand Up @@ -501,7 +502,7 @@ workflow immuno {
anchor_contribution_threshold=anchor_contribution_threshold
}

call pf.pvacfuse {
call pf.pvacfuse as agfusion_pvacfuse {
input:
input_fusions_zip=rna.annotated_fusion_predictions_zip,
star_fusion_file=rna.star_fusion_abridge,
Expand Down Expand Up @@ -532,6 +533,37 @@ workflow immuno {
problematic_amino_acids=problematic_amino_acids,
}

call pf.pvacfuse as arriba_pvacfuse {
input:
input_fusions_zip=rna.arriba_fusion_predict,
star_fusion_file=rna.star_fusion_abridge,
sample_name=tumor_sample_name,
alleles=hlaConsensus.consensus_alleles,
prediction_algorithms=prediction_algorithms,
epitope_lengths_class_i=epitope_lengths_class_i,
epitope_lengths_class_ii=epitope_lengths_class_ii,
binding_threshold=binding_threshold,
percentile_threshold=percentile_threshold,
iedb_retries=iedb_retries,
keep_tmp_files=pvacfuse_keep_tmp_files,
net_chop_method=net_chop_method,
netmhc_stab=netmhc_stab,
top_score_metric=top_score_metric,
net_chop_threshold=net_chop_threshold,
run_reference_proteome_similarity=run_reference_proteome_similarity,
peptide_fasta=peptide_fasta,
additional_report_columns=additional_report_columns,
fasta_size=fasta_size,
downstream_sequence_length=downstream_sequence_length,
exclude_nas=exclude_nas,
n_threads=pvacseq_threads,
read_support=pvacfuse_read_support,
expn_val=pvacfuse_expn_val,
allele_specific_binding_thresholds=allele_specific_binding_thresholds,
aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold,
problematic_amino_acids=problematic_amino_acids,
}

call generate_fda_metrics.generateFdaMetrics {
input:
reference = reference,
Expand Down Expand Up @@ -606,6 +638,10 @@ workflow immuno {
],
candidates_preliminary: rna.prelim_starfusion_results
},
arriba: [
rna.arriba_fusion_predict,
rna.arriba_fusion_discard
],
fusioninspector_evidence: rna.fusioninspector_evidence
}

Expand Down Expand Up @@ -732,10 +768,16 @@ workflow immuno {
phase_vcf: [phaseVcf.phased_vcf, phaseVcf.phased_vcf_tbi]
}

MHC pVACfuse = object {
mhc_i: pvacfuse.mhc_i,
mhc_ii: pvacfuse.mhc_ii,
combined: pvacfuse.combined
MHC agfusion_pvacfuse_predictions = object {
mhc_i: agfusion_pvacfuse.mhc_i,
mhc_ii: agfusion_pvacfuse.mhc_ii,
combined: agfusion_pvacfuse.combined
}

MHC arriba_pvacfuse_predictions = object {
mhc_i: arriba_pvacfuse.mhc_i,
mhc_ii: arriba_pvacfuse.mhc_ii,
combined: arriba_pvacfuse.combined
}

File pvacseq_annotated_expression_vcf_gz = pvacseq.annotated_vcf
Expand Down
13 changes: 13 additions & 0 deletions definitions/rnaseq_star_fusion.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import "tools/kallisto.wdl" as k
import "tools/mark_duplicates_and_sort.wdl" as mdas
import "tools/samtools_sort.wdl" as ss
import "tools/star_fusion_detect.wdl" as sfd
import "tools/arriba.wdl" as ar
import "tools/strandedness_check.wdl" as sc
import "tools/stringtie.wdl" as s
import "tools/transcript_to_gene.wdl" as ttg
Expand Down Expand Up @@ -84,6 +85,16 @@ workflow rnaseqStarFusion {
min_ffpm_level=min_ffpm_level
}

call ar.arriba {
input:
reference_annotation=reference_annotation,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
aligned_bam=starFusionDetect.aligned_bam,
star_fusion_chim_junc=starFusionDetect.chim_junc
}

call k.kallisto {
input:
kallisto_index=kallisto_index,
Expand Down Expand Up @@ -173,6 +184,8 @@ workflow rnaseqStarFusion {
File final_bam_bai = indexBam.indexed_bam_bai
File final_bai = indexBam.indexed_bai
File annotated_fusion_predictions_zip = agfusion.annotated_fusion_predictions_zip
File arriba_fusion_predict=arriba.fusion_predictions
File arriba_fusion_discard=arriba.discarded_fusion_predictions
File? star_fusion_coding_region_effects = starFusionDetect.coding_region_effects
Array[File] fusioninspector_evidence = starFusionDetect.fusioninspector_evidence
File flagstats = samtoolsFlagstat.flagstats
Expand Down
46 changes: 46 additions & 0 deletions definitions/tools/arriba.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
version 1.0

task arriba {
input {
File aligned_bam
File reference_annotation
File reference
File reference_fai
File reference_dict
File star_fusion_chim_junc
}

Float bam_size_gb = size([aligned_bam, reference_annotation], "GB")
Float reference_size_gb = size([reference, reference_fai, reference_dict], "GB")
Int space_needed_gb = 10 + round(3*bam_size_gb + reference_size_gb)

runtime {
preemptible: 1
maxRetries: 2
memory: "64GB"
docker: "uhrigs/arriba:2.4.0"
disks: "local-disk ~{space_needed_gb} HDD"
}

# explicit typing required, don't inline
command <<<
/arriba_v2.4.0/arriba \
-b /arriba_v2.4.0/database/blacklist_hg38_GRCh38_v2.4.0.tsv.gz \
-k /arriba_v2.4.0/database/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz \
-t /arriba_v2.4.0/database/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz \
-p /arriba_v2.4.0/database/protein_domains_hg38_GRCh38_v2.4.0.gff3 \
-o arriba_fusions.tsv \
-O arriba_fusions.discarded.tsv \
-x ~{aligned_bam} \
-g ~{reference_annotation} \
-a ~{reference} \
-c ~{star_fusion_chim_junc}
>>>

output {
File fusion_predictions = "arriba_fusions.tsv"
File discarded_fusion_predictions = "arriba_fusions.discarded.tsv"
}
}

workflow wf { call arriba { input: } }
2 changes: 1 addition & 1 deletion definitions/tools/pvacfuse.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ task pvacfuse {
runtime {
preemptible: 1
maxRetries: 2
docker: "susannakiwala/pvactools:4.0.0_rc_pvacview_v13"
docker: "susannakiwala/pvactools:4.0.0_rc_pvacview_v14"
memory: "16GB"
cpu: n_threads
disks: "local-disk ~{space_needed_gb} HDD"
Expand Down
2 changes: 1 addition & 1 deletion definitions/tools/pvacseq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ task pvacseq {
maxRetries: 2
memory: "16GB"
cpu: n_threads
docker: "susannakiwala/pvactools:4.0.0_rc_pvacview_v13"
docker: "susannakiwala/pvactools:4.0.0_rc_pvacview_v14"
disks: "local-disk ~{space_needed_gb} HDD"
}

Expand Down
5 changes: 3 additions & 2 deletions definitions/tools/star_fusion_detect.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ task starFusionDetect {
~{if defined(fusioninspector_mode) then "--FusionInspector " + fusioninspector_mode else ""} \
--STAR_outSAMattrRGline "~{sep=" , " outsam_attrrg_line}" \
--left_fq ~{sep="," fastq} --right_fq ~{sep="," fastq2} \
--min_FFPM ~{min_ffpm_level}
--min_FFPM ~{min_ffpm_level} \
--chimOutType SeparateSAMold
>>>

output {
Expand All @@ -66,7 +67,7 @@ task starFusionDetect {
File log = fusion_output_dir + "/Log.out"
File log_progress = fusion_output_dir + "/Log.progress.out"
File splice_junction_out = fusion_output_dir + "/SJ.out.tab"
File chim_junc = fusion_output_dir + "/Chimeric.out.junction"
File chim_junc = fusion_output_dir + "/Chimeric.out.sam"
# STAR also outputs gene counts file just like Kallisto
File gene_counts = fusion_output_dir + "/ReadsPerGene.out.tab"

Expand Down