From 12bcc9a68f0a442429d1f6de14cad7a0cd50616a Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 20 Nov 2023 12:02:19 -0500 Subject: [PATCH 1/6] Add subworkflow to infer strandedness for samples where information is missing. --- conf/modules.config | 12 +++ modules.json | 5 ++ modules/local/extractstrand/main.nf | 63 ++++++++++++++++ modules/nf-core/seqtk/sample/environment.yml | 7 ++ modules/nf-core/seqtk/sample/main.nf | 45 +++++++++++ modules/nf-core/seqtk/sample/meta.yml | 46 ++++++++++++ subworkflows/local/infer_strand.nf | 78 ++++++++++++++++++++ subworkflows/local/input_check.nf | 4 +- subworkflows/local/preprocess_reads.nf | 2 +- workflows/forte.nf | 10 ++- 10 files changed, 268 insertions(+), 4 deletions(-) create mode 100755 modules/local/extractstrand/main.nf create mode 100644 modules/nf-core/seqtk/sample/environment.yml create mode 100644 modules/nf-core/seqtk/sample/main.nf create mode 100644 modules/nf-core/seqtk/sample/meta.yml create mode 100755 subworkflows/local/infer_strand.nf diff --git a/conf/modules.config b/conf/modules.config index 07afea1..578c2a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -485,4 +485,16 @@ process { ] } + withName: '.*:INFER_STRAND:STAR_ALIGN' { + publishDir = [ + enabled:false + ] + } + + withName: '.*:INFER_STRAND:SEQTK_SAMPLE' { + publishDir = [ + enabled: false + ] + } + } diff --git a/modules.json b/modules.json index fa4c97f..4350889 100644 --- a/modules.json +++ b/modules.json @@ -131,6 +131,11 @@ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, + "seqtk/sample": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "star/align": { "branch": "master", "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914", diff --git a/modules/local/extractstrand/main.nf b/modules/local/extractstrand/main.nf new file mode 100755 index 0000000..3de930f --- /dev/null +++ b/modules/local/extractstrand/main.nf @@ -0,0 +1,63 @@ +process EXTRACTSTRAND { + tag "$meta.id" + label 'process_single' + + conda "biocontainers::pandas:1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(metrics) + + output: + tuple val(meta), path("*.determination.txt"), emit: strand + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/local/bin/python + + import pandas as pd + + df = pd.read_csv("${metrics}", skiprows=(lambda x: x not in [6, 7]), sep="\\t") + r1_transcript_strand_reads = int(df.iloc[0]['NUM_R1_TRANSCRIPT_STRAND_READS']) + r2_transcript_strand_reads = int(df.iloc[0]['NUM_R2_TRANSCRIPT_STRAND_READS']) + + if r1_transcript_strand_reads/3 > r2_transcript_strand_reads: + determination = "yes" + elif r2_transcript_strand_reads/3 > r1_transcript_strand_reads: + determination = "reverse" + else: + determination = "no" + + strandedness_correct = True + if "${meta.strandedness}" != "auto": + if determination == "${meta.strandedness}": + strandedness_correct = False + + with open("${prefix}.determination.txt",'w') as f: + f.write("${meta.id}\\t${meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n") + + with open("versions.yml", 'w') as f: + f.write("${task.process}:") + f.write(" pandas:1.5.2") + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/python + + with open("${prefix}.determination.txt", 'w') as f: + pass + + with open("versions.yml", 'w') as f: + f.write("${task.process}:") + f.write(" pandas:1.5.2") + """ +} diff --git a/modules/nf-core/seqtk/sample/environment.yml b/modules/nf-core/seqtk/sample/environment.yml new file mode 100644 index 0000000..2344cd2 --- /dev/null +++ b/modules/nf-core/seqtk/sample/environment.yml @@ -0,0 +1,7 @@ +name: seqtk_sample +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::seqtk=1.3 diff --git a/modules/nf-core/seqtk/sample/main.nf b/modules/nf-core/seqtk/sample/main.nf new file mode 100644 index 0000000..230b054 --- /dev/null +++ b/modules/nf-core/seqtk/sample/main.nf @@ -0,0 +1,45 @@ +process SEQTK_SAMPLE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : + 'biocontainers/seqtk:1.3--h5bf99c6_3' }" + + input: + tuple val(meta), path(reads), val(sample_size) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (!(args ==~ /.*-s[0-9]+.*/)) { + args += " -s100" + } + if ( !sample_size ) { + error "SEQTK/SAMPLE must have a sample_size value included" + } + """ + printf "%s\\n" $reads | while read f; + do + seqtk \\ + sample \\ + $args \\ + \$f \\ + $sample_size \\ + | gzip --no-name > ${prefix}_\$(basename \$f) + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqtk/sample/meta.yml b/modules/nf-core/seqtk/sample/meta.yml new file mode 100644 index 0000000..549aee9 --- /dev/null +++ b/modules/nf-core/seqtk/sample/meta.yml @@ -0,0 +1,46 @@ +name: seqtk_sample +description: Subsample reads from FASTQ files +keywords: + - sample +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. Seqtk sample command subsamples sequences. + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input FastQ files + pattern: "*.{fastq.gz}" + - sample_size: + type: value + description: Number of reads to sample. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Subsampled FastQ files + pattern: "*.{fastq.gz}" +authors: + - "@kaurravneet4123" + - "@sidorov-si" + - "@adamrtalbot" +maintainers: + - "@kaurravneet4123" + - "@sidorov-si" + - "@adamrtalbot" diff --git a/subworkflows/local/infer_strand.nf b/subworkflows/local/infer_strand.nf new file mode 100755 index 0000000..03bef6a --- /dev/null +++ b/subworkflows/local/infer_strand.nf @@ -0,0 +1,78 @@ +include { SEQTK_SAMPLE } from '../../modules/nf-core/seqtk/sample/main' +include { PREPROCESS_READS } from './preprocess_reads' +include { PICARD_COLLECTRNASEQMETRICS } from '../../modules/nf-core/picard/collectrnaseqmetrics/main' +include { EXTRACTSTRAND } from '../../modules/local/extractstrand/main' +include { STAR_ALIGN } from '../../modules/nf-core/star/align/main' +include { GROUP_READS } from './group_reads' + +workflow INFER_STRAND { + + take: + reads + star_index + gtf + refflat + fasta + + main: + + ch_versions = Channel.empty() + + reads_branch = reads + .branch{meta, reads -> + auto: meta.strandedness == "auto" + other: true + } + + GROUP_READS(reads_branch.auto) + + SEQTK_SAMPLE( + GROUP_READS.out.grouped_reads + .map{ meta, reads -> + [ meta, meta.single_end ? [reads[0]] : [reads[0], reads[1]], 50000 ] + } + ) + ch_versions = ch_versions.mix(SEQTK_SAMPLE.out.versions.first()) + + PREPROCESS_READS(SEQTK_SAMPLE.out.reads) + ch_versions = ch_versions.mix(PREPROCESS_READS.out.ch_versions.first()) + + STAR_ALIGN( + PREPROCESS_READS.out.reads_untrimmed, + star_index, + gtf, + false, + [], + [] + ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + PICARD_COLLECTRNASEQMETRICS( + STAR_ALIGN.out.bam, + refflat, + fasta, + [] + ) + ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first()) + + EXTRACTSTRAND(PICARD_COLLECTRNASEQMETRICS.out.metrics) + + amended_reads = EXTRACTSTRAND.out.strand + .map{meta, strand_txt -> + [ meta["sample"], strand_txt ] + }.join( + reads.map{ meta, reads -> + [ meta["sample"], meta, reads ] + }, by:[0] + ).map{ sample, strand_txt, meta, reads -> + def new_meta = meta.clone() + new_meta["input_strandedness"] = new_meta["strandedness"] + new_meta["strandedness"] = strand_txt.text.split("\\t")[2] + [new_meta, reads] + }.mix( reads_branch.other ) + + emit: + reads = amended_reads + ch_versions = ch_versions + +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 90d1a3a..4e3318b 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -52,8 +52,8 @@ def create_fastq_channel(LinkedHashMap row) { try { meta.umi2 = meta.umi2.toInteger() * "N" } catch(Exception e) { } - meta.strandedness = row.strand ? (row.strand.trim() == "" ? "no" : row.strand.trim()) : "no" - if (! ["yes","no","reverse"].contains(meta.strandedness)){ + meta.strandedness = row.strand ? (row.strand.trim() == "" ? "auto" : row.strand.trim()) : "auto" + if (! ["yes","no","reverse","auto"].contains(meta.strandedness)){ exit 1, "ERROR: Please check input samplesheet -> strand value is invalid!\n${row.strand ? row.strand : ""}" } diff --git a/subworkflows/local/preprocess_reads.nf b/subworkflows/local/preprocess_reads.nf index 2ed1180..892edc3 100644 --- a/subworkflows/local/preprocess_reads.nf +++ b/subworkflows/local/preprocess_reads.nf @@ -21,7 +21,7 @@ workflow PREPROCESS_READS { .map{ meta, reads -> def meta_clone = meta.clone() if (params.extract_fq_read_group) { - def rg_map = Utils.flowcellLaneFromFastq(reads[0]) + def rg_map = Utils.flowcellLaneFromFastq([reads].flatten()[0]) meta_clone.read_group = "${meta.sample}@${rg_map["fcid"]}@${rg_map["lane"]}@${meta.fastq_pair_id}" meta_clone.id = meta_clone.read_group } else { diff --git a/workflows/forte.nf b/workflows/forte.nf index 44339eb..bc2acbf 100644 --- a/workflows/forte.nf +++ b/workflows/forte.nf @@ -51,6 +51,7 @@ include { BAIT_INPUTS } from '../subworkflows/local/baits' // include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { PREPARE_REFERENCES } from '../subworkflows/local/prepare_references' +include { INFER_STRAND } from '../subworkflows/local/infer_strand' include { PREPROCESS_READS } from '../subworkflows/local/preprocess_reads' include { ALIGN_READS } from '../subworkflows/local/align_reads' include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -92,9 +93,16 @@ workflow FORTE { PREPARE_REFERENCES() ch_versions = ch_versions.mix(PREPARE_REFERENCES.out.ch_versions) + INFER_STRAND( + INPUT_CHECK.out.reads, + PREPARE_REFERENCES.out.star_index, + PREPARE_REFERENCES.out.gtf, + PREPARE_REFERENCES.out.refflat, + params.fasta + ) PREPROCESS_READS( - INPUT_CHECK.out.reads + INFER_STRAND.out.reads ) ch_versions = ch_versions.mix(PREPROCESS_READS.out.ch_versions) From 9d62856b8c664d771c04965260ba93d14b66e118 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 20 Nov 2023 13:17:59 -0500 Subject: [PATCH 2/6] Added strandedness assessment to multiqc reports --- assets/multiqc_config.yml | 14 ++++++++++++++ conf/modules.config | 6 +++--- modules/local/extractstrand/main.nf | 22 +++++++++++----------- subworkflows/local/infer_strand.nf | 3 +-- subworkflows/local/input_check.nf | 1 + subworkflows/local/qc.nf | 8 ++++++++ 6 files changed, 38 insertions(+), 16 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ce46189..3d8a262 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -21,9 +21,23 @@ custom_data: title: "Gene Count" format: "{:,.0f}" description: "Kallisto: Number of genes with detected expression" + assess_strandedness: + plot_type: "generalstats" + pconfig: + - input_strandedness: + title: "Input Strandedness" + description: "Strandedness entered in input samplesheet" + - inferred_strandedness: + title: "Inferred Strandedness" + description: "Strandedness determined from PICARD_COLLECTRNASEQMETRICS results" + - input_strand_correct: + title: "Strandedness Correct" + description: "Correctness of input strandedness" sp: htseq_expression_genstats: fn: "*.htseq.summary.txt" kallisto_expression_genstats: fn: "*.kallisto.customsummary.txt" + assess_strandedness: + fn: "*.strandedness.txt" diff --git a/conf/modules.config b/conf/modules.config index 578c2a4..c0e4dc9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -485,15 +485,15 @@ process { ] } - withName: '.*:INFER_STRAND:STAR_ALIGN' { + withName: '.*:INFER_STRAND:.*' { publishDir = [ enabled:false ] } - withName: '.*:INFER_STRAND:SEQTK_SAMPLE' { + withName: EXTRACTSTRAND { publishDir = [ - enabled: false + enabled:false ] } diff --git a/modules/local/extractstrand/main.nf b/modules/local/extractstrand/main.nf index 3de930f..031c96d 100755 --- a/modules/local/extractstrand/main.nf +++ b/modules/local/extractstrand/main.nf @@ -11,7 +11,7 @@ process EXTRACTSTRAND { tuple val(meta), path(metrics) output: - tuple val(meta), path("*.determination.txt"), emit: strand + tuple val(meta), path("*.strandedness.txt"), emit: strand path "versions.yml" , emit: versions when: @@ -36,16 +36,16 @@ process EXTRACTSTRAND { determination = "no" strandedness_correct = True - if "${meta.strandedness}" != "auto": - if determination == "${meta.strandedness}": - strandedness_correct = False + if "${meta.strandedness}" == determination: + strandedness_correct = False - with open("${prefix}.determination.txt",'w') as f: - f.write("${meta.id}\\t${meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n") + with open("${prefix}.strandedness.txt",'w') as f: + f.write("\\tinput_strandedness\\tinferred_strandedness\\tinput_strand_correct\\n") + f.write("${meta.id}\\t${meta.auto_strandedness ? "auto" : meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n") with open("versions.yml", 'w') as f: - f.write("${task.process}:") - f.write(" pandas:1.5.2") + f.write("${task.process}:\\n") + f.write(" pandas: 1.5.2\\n") """ stub: @@ -53,11 +53,11 @@ process EXTRACTSTRAND { """ #!/usr/bin/python - with open("${prefix}.determination.txt", 'w') as f: + with open("${prefix}.strandedness.txt", 'w') as f: pass with open("versions.yml", 'w') as f: - f.write("${task.process}:") - f.write(" pandas:1.5.2") + f.write("${task.process}:\\n") + f.write(" pandas:1.5.2\\n") """ } diff --git a/subworkflows/local/infer_strand.nf b/subworkflows/local/infer_strand.nf index 03bef6a..5b808f4 100755 --- a/subworkflows/local/infer_strand.nf +++ b/subworkflows/local/infer_strand.nf @@ -20,7 +20,7 @@ workflow INFER_STRAND { reads_branch = reads .branch{meta, reads -> - auto: meta.strandedness == "auto" + auto: meta.auto_strandedness other: true } @@ -66,7 +66,6 @@ workflow INFER_STRAND { }, by:[0] ).map{ sample, strand_txt, meta, reads -> def new_meta = meta.clone() - new_meta["input_strandedness"] = new_meta["strandedness"] new_meta["strandedness"] = strand_txt.text.split("\\t")[2] [new_meta, reads] }.mix( reads_branch.other ) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 4e3318b..9831ea7 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -53,6 +53,7 @@ def create_fastq_channel(LinkedHashMap row) { meta.umi2 = meta.umi2.toInteger() * "N" } catch(Exception e) { } meta.strandedness = row.strand ? (row.strand.trim() == "" ? "auto" : row.strand.trim()) : "auto" + meta.auto_strandedness = meta.strandedness == "auto" ? true : false if (! ["yes","no","reverse","auto"].contains(meta.strandedness)){ exit 1, "ERROR: Please check input samplesheet -> strand value is invalid!\n${row.strand ? row.strand : ""}" } diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 4865128..97f42d2 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -7,6 +7,8 @@ include { MULTIQC as MULTIQC_COLLECT } from '../../modules/nf-core/multiqc/main' include { BAM_RSEQC } from '../nf-core/bam_rseqc/main' +include { EXTRACTSTRAND } from '../../modules/local/extractstrand/main' + workflow QC { @@ -40,6 +42,11 @@ workflow QC { ) ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first()) + EXTRACTSTRAND( + PICARD_COLLECTRNASEQMETRICS.out.metrics + ) + ch_versions = ch_versions.mix(EXTRACTSTRAND.out.versions.first()) + PICARD_COLLECTHSMETRICS( bam .filter{ meta, bam -> @@ -58,6 +65,7 @@ workflow QC { multiqc_files = multiqc_files .mix(PICARD_COLLECTRNASEQMETRICS.out.metrics) + .mix(EXTRACTSTRAND.out.strand) .mix(PICARD_COLLECTHSMETRICS.out.metrics) .mix(BAM_RSEQC.out.bamstat_txt) .mix(BAM_RSEQC.out.innerdistance_freq) From e4ab6365799078abf1ed14867d90adba73856d62 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 20 Nov 2023 19:10:14 -0500 Subject: [PATCH 3/6] fix reporting of strand correctness in multiqc report --- assets/analysis_multiqc_config.yml | 18 +++++++---------- modules/local/extractstrand/main.nf | 30 ++++++++++++----------------- subworkflows/local/infer_strand.nf | 3 ++- 3 files changed, 21 insertions(+), 30 deletions(-) diff --git a/assets/analysis_multiqc_config.yml b/assets/analysis_multiqc_config.yml index 3d8a262..0c31834 100644 --- a/assets/analysis_multiqc_config.yml +++ b/assets/analysis_multiqc_config.yml @@ -22,17 +22,13 @@ custom_data: format: "{:,.0f}" description: "Kallisto: Number of genes with detected expression" assess_strandedness: - plot_type: "generalstats" + file_format: 'tsv' + section_name: 'Strandedness' + description: "Pass/Fail status of sample strandedness based on the input file" + plot_type: 'table' pconfig: - - input_strandedness: - title: "Input Strandedness" - description: "Strandedness entered in input samplesheet" - - inferred_strandedness: - title: "Inferred Strandedness" - description: "Strandedness determined from PICARD_COLLECTRNASEQMETRICS results" - - input_strand_correct: - title: "Strandedness Correct" - description: "Correctness of input strandedness" + id: 'strandedness' + namespace: 'strandedness_table' sp: htseq_expression_genstats: @@ -40,4 +36,4 @@ sp: kallisto_expression_genstats: fn: "*.kallisto.customsummary.txt" assess_strandedness: - fn: "*.strandedness.txt" + fn: "*.strandedness.tsv" diff --git a/modules/local/extractstrand/main.nf b/modules/local/extractstrand/main.nf index 031c96d..b78f1f8 100755 --- a/modules/local/extractstrand/main.nf +++ b/modules/local/extractstrand/main.nf @@ -11,7 +11,7 @@ process EXTRACTSTRAND { tuple val(meta), path(metrics) output: - tuple val(meta), path("*.strandedness.txt"), emit: strand + tuple val(meta), path("*.strandedness.tsv"), emit: strand path "versions.yml" , emit: versions when: @@ -25,23 +25,17 @@ process EXTRACTSTRAND { import pandas as pd df = pd.read_csv("${metrics}", skiprows=(lambda x: x not in [6, 7]), sep="\\t") - r1_transcript_strand_reads = int(df.iloc[0]['NUM_R1_TRANSCRIPT_STRAND_READS']) - r2_transcript_strand_reads = int(df.iloc[0]['NUM_R2_TRANSCRIPT_STRAND_READS']) - - if r1_transcript_strand_reads/3 > r2_transcript_strand_reads: - determination = "yes" - elif r2_transcript_strand_reads/3 > r1_transcript_strand_reads: - determination = "reverse" - else: - determination = "no" - - strandedness_correct = True - if "${meta.strandedness}" == determination: - strandedness_correct = False - - with open("${prefix}.strandedness.txt",'w') as f: - f.write("\\tinput_strandedness\\tinferred_strandedness\\tinput_strand_correct\\n") - f.write("${meta.id}\\t${meta.auto_strandedness ? "auto" : meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n") + df = df.drop(columns = [col for col in list(df) if col not in ['NUM_R1_TRANSCRIPT_STRAND_READS','NUM_R2_TRANSCRIPT_STRAND_READS']]) + + df['input_strandedness'] = "${meta.auto_strandedness ? "auto" : meta.strandedness}" + df['inferred_strandedness'] = df.apply(lambda row: "yes" if row['NUM_R1_TRANSCRIPT_STRAND_READS']/3 >= row['NUM_R2_TRANSCRIPT_STRAND_READS'] else "reverse" if row['NUM_R2_TRANSCRIPT_STRAND_READS']/3 >= row['NUM_R1_TRANSCRIPT_STRAND_READS'] else "no", axis=1) + df['input_strand_correct'] = df.apply(lambda row: True if "${meta.strandedness}" == row["inferred_strandedness"] else False, axis=1) + df.index = ['${meta.id}'] + + desired_column_order = ['input_strandedness', 'inferred_strandedness', 'input_strand_correct','NUM_R1_TRANSCRIPT_STRAND_READS','NUM_R2_TRANSCRIPT_STRAND_READS'] + df = df[desired_column_order] + + df.to_csv("${prefix}.strandedness.tsv",sep="\\t", index=True) with open("versions.yml", 'w') as f: f.write("${task.process}:\\n") diff --git a/subworkflows/local/infer_strand.nf b/subworkflows/local/infer_strand.nf index 5b808f4..a5d8330 100755 --- a/subworkflows/local/infer_strand.nf +++ b/subworkflows/local/infer_strand.nf @@ -66,7 +66,8 @@ workflow INFER_STRAND { }, by:[0] ).map{ sample, strand_txt, meta, reads -> def new_meta = meta.clone() - new_meta["strandedness"] = strand_txt.text.split("\\t")[2] + new_meta["strandedness"] = strand_txt.readLines()[1].split("\\t")[2] + [new_meta, reads] }.mix( reads_branch.other ) From c2e21e0286fb8390a25dc2025ae7f07f1a3d6a14 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 20 Nov 2023 19:40:40 -0500 Subject: [PATCH 4/6] fix linting with prettier --- assets/analysis_multiqc_config.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/assets/analysis_multiqc_config.yml b/assets/analysis_multiqc_config.yml index 0c31834..9c91c9a 100644 --- a/assets/analysis_multiqc_config.yml +++ b/assets/analysis_multiqc_config.yml @@ -22,13 +22,13 @@ custom_data: format: "{:,.0f}" description: "Kallisto: Number of genes with detected expression" assess_strandedness: - file_format: 'tsv' - section_name: 'Strandedness' + file_format: "tsv" + section_name: "Strandedness" description: "Pass/Fail status of sample strandedness based on the input file" - plot_type: 'table' + plot_type: "table" pconfig: - id: 'strandedness' - namespace: 'strandedness_table' + id: "strandedness" + namespace: "strandedness_table" sp: htseq_expression_genstats: From 37da1dd1795abbcb03d1183cd3fdd480afad56e9 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Mon, 20 Nov 2023 19:41:10 -0500 Subject: [PATCH 5/6] fix docker registry issues --- modules/local/agfusion/batch/main.nf | 4 ++-- modules/local/agfusion/download/main.nf | 4 ++-- modules/local/metafusion/main.nf | 4 ++-- modules/local/oncokb/fusionannotator/main.nf | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/local/agfusion/batch/main.nf b/modules/local/agfusion/batch/main.nf index 95f3fa2..0544b5d 100644 --- a/modules/local/agfusion/batch/main.nf +++ b/modules/local/agfusion/batch/main.nf @@ -5,8 +5,8 @@ process AGFUSION_BATCH { // Note: 2.7X indices incompatible with AWS iGenomes. conda 'bioconda::agfusion=1.252' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/agfusion:0.0.6' : - 'cmopipeline/agfusion:0.0.6' }" + 'docker://cmopipeline/agfusion:0.0.6' : + 'docker://cmopipeline/agfusion:0.0.6' }" input: tuple val(meta), path(fusions) diff --git a/modules/local/agfusion/download/main.nf b/modules/local/agfusion/download/main.nf index ad76dac..6647777 100644 --- a/modules/local/agfusion/download/main.nf +++ b/modules/local/agfusion/download/main.nf @@ -4,8 +4,8 @@ process AGFUSION_DOWNLOAD { // Note: 2.7X indices incompatible with AWS iGenomes. conda 'bioconda::agfusion=1.252' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/agfusion:0.0.6' : - 'cmopipeline/agfusion:0.0.6' }" + 'docker://cmopipeline/agfusion:0.0.6' : + 'docker://cmopipeline/agfusion:0.0.6' }" input: val(ensembl_release) diff --git a/modules/local/metafusion/main.nf b/modules/local/metafusion/main.nf index e3e8120..40b90ee 100644 --- a/modules/local/metafusion/main.nf +++ b/modules/local/metafusion/main.nf @@ -3,8 +3,8 @@ process METAFUSION { label "process_low" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/metafusion:0.0.6' : - 'cmopipeline/metafusion:0.0.6' }" + 'docker://cmopipeline/metafusion:0.0.6' : + 'docker://cmopipeline/metafusion:0.0.6' }" input: tuple val(meta), path(cff) diff --git a/modules/local/oncokb/fusionannotator/main.nf b/modules/local/oncokb/fusionannotator/main.nf index 1a263c5..bd311e7 100644 --- a/modules/local/oncokb/fusionannotator/main.nf +++ b/modules/local/oncokb/fusionannotator/main.nf @@ -5,8 +5,8 @@ process ONCOKB_FUSIONANNOTATOR { // Note: 2.7X indices incompatible with AWS iGenomes. //conda "shahcompbio::oncokb-annotator=2.3.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'cmopipeline/oncokb-annotator:0.0.1' : - 'cmopipeline/oncokb-annotator:0.0.1' }" + 'docker://cmopipeline/oncokb-annotator:0.0.1' : + 'docker://cmopipeline/oncokb-annotator:0.0.1' }" input: tuple val(meta), path(cff) From ac13c081f2b98fdf60d9135611faef71c54b810f Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Fri, 22 Dec 2023 20:52:27 -0500 Subject: [PATCH 6/6] enable module binaries --- conf/base.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/base.config b/conf/base.config index c799813..c9129d2 100644 --- a/conf/base.config +++ b/conf/base.config @@ -67,3 +67,5 @@ process { cache = false } } + +nextflow.enable.moduleBinaries = true