From 12bcc9a68f0a442429d1f6de14cad7a0cd50616a Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 20 Nov 2023 12:02:19 -0500
Subject: [PATCH 1/6] Add subworkflow to infer strandedness for samples where
 information is missing.

---
 conf/modules.config                          | 12 +++
 modules.json                                 |  5 ++
 modules/local/extractstrand/main.nf          | 63 ++++++++++++++++
 modules/nf-core/seqtk/sample/environment.yml |  7 ++
 modules/nf-core/seqtk/sample/main.nf         | 45 +++++++++++
 modules/nf-core/seqtk/sample/meta.yml        | 46 ++++++++++++
 subworkflows/local/infer_strand.nf           | 78 ++++++++++++++++++++
 subworkflows/local/input_check.nf            |  4 +-
 subworkflows/local/preprocess_reads.nf       |  2 +-
 workflows/forte.nf                           | 10 ++-
 10 files changed, 268 insertions(+), 4 deletions(-)
 create mode 100755 modules/local/extractstrand/main.nf
 create mode 100644 modules/nf-core/seqtk/sample/environment.yml
 create mode 100644 modules/nf-core/seqtk/sample/main.nf
 create mode 100644 modules/nf-core/seqtk/sample/meta.yml
 create mode 100755 subworkflows/local/infer_strand.nf

diff --git a/conf/modules.config b/conf/modules.config
index 07afea1..578c2a4 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -485,4 +485,16 @@ process {
         ]
     }
 
+    withName: '.*:INFER_STRAND:STAR_ALIGN' {
+        publishDir = [
+            enabled:false
+        ]
+    }
+
+    withName: '.*:INFER_STRAND:SEQTK_SAMPLE' {
+        publishDir = [
+            enabled: false
+        ]
+    }
+
 }
diff --git a/modules.json b/modules.json
index fa4c97f..4350889 100644
--- a/modules.json
+++ b/modules.json
@@ -131,6 +131,11 @@
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
+                    "seqtk/sample": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
                     "star/align": {
                         "branch": "master",
                         "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914",
diff --git a/modules/local/extractstrand/main.nf b/modules/local/extractstrand/main.nf
new file mode 100755
index 0000000..3de930f
--- /dev/null
+++ b/modules/local/extractstrand/main.nf
@@ -0,0 +1,63 @@
+process EXTRACTSTRAND {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "biocontainers::pandas:1.5.2"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+            'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
+            'biocontainers/pandas:1.5.2' }"
+
+    input:
+    tuple val(meta), path(metrics)
+
+    output:
+    tuple val(meta), path("*.determination.txt"), emit: strand
+    path "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    #!/usr/local/bin/python
+
+    import pandas as pd
+
+    df = pd.read_csv("${metrics}", skiprows=(lambda x: x not in [6, 7]), sep="\\t")
+    r1_transcript_strand_reads = int(df.iloc[0]['NUM_R1_TRANSCRIPT_STRAND_READS'])
+    r2_transcript_strand_reads = int(df.iloc[0]['NUM_R2_TRANSCRIPT_STRAND_READS'])
+
+    if r1_transcript_strand_reads/3 > r2_transcript_strand_reads:
+        determination = "yes"
+    elif r2_transcript_strand_reads/3 > r1_transcript_strand_reads:
+        determination = "reverse"
+    else:
+        determination = "no"
+
+    strandedness_correct = True
+    if "${meta.strandedness}" != "auto":
+        if determination == "${meta.strandedness}":
+            strandedness_correct = False
+
+    with open("${prefix}.determination.txt",'w') as f:
+        f.write("${meta.id}\\t${meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n")
+
+    with open("versions.yml", 'w') as f:
+        f.write("${task.process}:")
+        f.write("    pandas:1.5.2")
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    #!/usr/bin/python
+
+    with open("${prefix}.determination.txt", 'w') as f:
+        pass
+
+    with open("versions.yml", 'w') as f:
+        f.write("${task.process}:")
+        f.write("    pandas:1.5.2")
+    """
+}
diff --git a/modules/nf-core/seqtk/sample/environment.yml b/modules/nf-core/seqtk/sample/environment.yml
new file mode 100644
index 0000000..2344cd2
--- /dev/null
+++ b/modules/nf-core/seqtk/sample/environment.yml
@@ -0,0 +1,7 @@
+name: seqtk_sample
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::seqtk=1.3
diff --git a/modules/nf-core/seqtk/sample/main.nf b/modules/nf-core/seqtk/sample/main.nf
new file mode 100644
index 0000000..230b054
--- /dev/null
+++ b/modules/nf-core/seqtk/sample/main.nf
@@ -0,0 +1,45 @@
+process SEQTK_SAMPLE {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' :
+        'biocontainers/seqtk:1.3--h5bf99c6_3' }"
+
+    input:
+    tuple val(meta), path(reads), val(sample_size)
+
+    output:
+    tuple val(meta), path("*.fastq.gz"), emit: reads
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (!(args ==~ /.*-s[0-9]+.*/)) {
+        args += " -s100"
+    }
+    if ( !sample_size ) {
+        error "SEQTK/SAMPLE must have a sample_size value included"
+    }
+    """
+    printf "%s\\n" $reads | while read f;
+    do
+        seqtk \\
+            sample \\
+            $args \\
+            \$f \\
+            $sample_size \\
+            | gzip --no-name > ${prefix}_\$(basename \$f)
+    done
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/seqtk/sample/meta.yml b/modules/nf-core/seqtk/sample/meta.yml
new file mode 100644
index 0000000..549aee9
--- /dev/null
+++ b/modules/nf-core/seqtk/sample/meta.yml
@@ -0,0 +1,46 @@
+name: seqtk_sample
+description: Subsample reads from FASTQ files
+keywords:
+  - sample
+tools:
+  - seqtk:
+      description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. Seqtk sample command subsamples sequences.
+      homepage: https://github.com/lh3/seqtk
+      documentation: https://docs.csc.fi/apps/seqtk/
+      tool_dev_url: https://github.com/lh3/seqtk
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: List of input FastQ files
+      pattern: "*.{fastq.gz}"
+  - sample_size:
+      type: value
+      description: Number of reads to sample.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: Subsampled FastQ files
+      pattern: "*.{fastq.gz}"
+authors:
+  - "@kaurravneet4123"
+  - "@sidorov-si"
+  - "@adamrtalbot"
+maintainers:
+  - "@kaurravneet4123"
+  - "@sidorov-si"
+  - "@adamrtalbot"
diff --git a/subworkflows/local/infer_strand.nf b/subworkflows/local/infer_strand.nf
new file mode 100755
index 0000000..03bef6a
--- /dev/null
+++ b/subworkflows/local/infer_strand.nf
@@ -0,0 +1,78 @@
+include { SEQTK_SAMPLE                } from '../../modules/nf-core/seqtk/sample/main'
+include { PREPROCESS_READS            } from './preprocess_reads'
+include { PICARD_COLLECTRNASEQMETRICS } from '../../modules/nf-core/picard/collectrnaseqmetrics/main'
+include { EXTRACTSTRAND               } from '../../modules/local/extractstrand/main'
+include { STAR_ALIGN                  } from '../../modules/nf-core/star/align/main'
+include { GROUP_READS                 } from './group_reads'
+
+workflow INFER_STRAND {
+
+    take:
+    reads
+    star_index
+    gtf
+    refflat
+    fasta
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    reads_branch = reads
+        .branch{meta, reads ->
+            auto: meta.strandedness == "auto"
+            other: true
+        }
+
+    GROUP_READS(reads_branch.auto)
+
+    SEQTK_SAMPLE(
+        GROUP_READS.out.grouped_reads
+            .map{ meta, reads ->
+                [ meta, meta.single_end ? [reads[0]] : [reads[0], reads[1]], 50000 ]
+            }
+    )
+    ch_versions = ch_versions.mix(SEQTK_SAMPLE.out.versions.first())
+
+    PREPROCESS_READS(SEQTK_SAMPLE.out.reads)
+    ch_versions = ch_versions.mix(PREPROCESS_READS.out.ch_versions.first())
+
+    STAR_ALIGN(
+        PREPROCESS_READS.out.reads_untrimmed,
+        star_index,
+        gtf,
+        false,
+        [],
+        []
+    )
+    ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first())
+
+    PICARD_COLLECTRNASEQMETRICS(
+        STAR_ALIGN.out.bam,
+        refflat,
+        fasta,
+        []
+    )
+    ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first())
+
+    EXTRACTSTRAND(PICARD_COLLECTRNASEQMETRICS.out.metrics)
+
+    amended_reads = EXTRACTSTRAND.out.strand
+        .map{meta, strand_txt ->
+            [ meta["sample"], strand_txt ]
+        }.join(
+            reads.map{ meta, reads ->
+                [ meta["sample"], meta, reads ]
+            }, by:[0]
+        ).map{ sample, strand_txt, meta, reads ->
+            def new_meta = meta.clone()
+            new_meta["input_strandedness"] = new_meta["strandedness"]
+            new_meta["strandedness"] = strand_txt.text.split("\\t")[2]
+            [new_meta, reads]
+        }.mix( reads_branch.other )
+
+    emit:
+    reads       = amended_reads
+    ch_versions = ch_versions
+
+}
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 90d1a3a..4e3318b 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -52,8 +52,8 @@ def create_fastq_channel(LinkedHashMap row) {
     try {
         meta.umi2 = meta.umi2.toInteger() * "N"
     } catch(Exception e) { }
-    meta.strandedness = row.strand ? (row.strand.trim() == "" ? "no" : row.strand.trim()) : "no"
-    if (! ["yes","no","reverse"].contains(meta.strandedness)){
+    meta.strandedness = row.strand ? (row.strand.trim() == "" ? "auto" : row.strand.trim()) : "auto"
+    if (! ["yes","no","reverse","auto"].contains(meta.strandedness)){
         exit 1, "ERROR: Please check input samplesheet -> strand value is invalid!\n${row.strand ? row.strand : ""}"
     }
 
diff --git a/subworkflows/local/preprocess_reads.nf b/subworkflows/local/preprocess_reads.nf
index 2ed1180..892edc3 100644
--- a/subworkflows/local/preprocess_reads.nf
+++ b/subworkflows/local/preprocess_reads.nf
@@ -21,7 +21,7 @@ workflow PREPROCESS_READS {
         .map{ meta, reads ->
             def meta_clone = meta.clone()
             if (params.extract_fq_read_group) {
-                def rg_map = Utils.flowcellLaneFromFastq(reads[0])
+                def rg_map = Utils.flowcellLaneFromFastq([reads].flatten()[0])
                 meta_clone.read_group = "${meta.sample}@${rg_map["fcid"]}@${rg_map["lane"]}@${meta.fastq_pair_id}"
                 meta_clone.id = meta_clone.read_group
             } else {
diff --git a/workflows/forte.nf b/workflows/forte.nf
index 44339eb..bc2acbf 100644
--- a/workflows/forte.nf
+++ b/workflows/forte.nf
@@ -51,6 +51,7 @@ include { BAIT_INPUTS     } from '../subworkflows/local/baits'
 //
 include { CUSTOM_DUMPSOFTWAREVERSIONS       } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { PREPARE_REFERENCES                } from '../subworkflows/local/prepare_references'
+include { INFER_STRAND                      } from '../subworkflows/local/infer_strand'
 include { PREPROCESS_READS                  } from '../subworkflows/local/preprocess_reads'
 include { ALIGN_READS                       } from '../subworkflows/local/align_reads'
 include { MULTIQC                           } from '../modules/nf-core/multiqc/main'
@@ -92,9 +93,16 @@ workflow FORTE {
     PREPARE_REFERENCES()
     ch_versions = ch_versions.mix(PREPARE_REFERENCES.out.ch_versions)
 
+    INFER_STRAND(
+        INPUT_CHECK.out.reads,
+        PREPARE_REFERENCES.out.star_index,
+        PREPARE_REFERENCES.out.gtf,
+        PREPARE_REFERENCES.out.refflat,
+        params.fasta
+    )
 
     PREPROCESS_READS(
-        INPUT_CHECK.out.reads
+        INFER_STRAND.out.reads
     )
     ch_versions = ch_versions.mix(PREPROCESS_READS.out.ch_versions)
 

From 9d62856b8c664d771c04965260ba93d14b66e118 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 20 Nov 2023 13:17:59 -0500
Subject: [PATCH 2/6] Added strandedness assessment to multiqc reports

---
 assets/multiqc_config.yml           | 14 ++++++++++++++
 conf/modules.config                 |  6 +++---
 modules/local/extractstrand/main.nf | 22 +++++++++++-----------
 subworkflows/local/infer_strand.nf  |  3 +--
 subworkflows/local/input_check.nf   |  1 +
 subworkflows/local/qc.nf            |  8 ++++++++
 6 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index ce46189..3d8a262 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -21,9 +21,23 @@ custom_data:
           title: "Gene Count"
           format: "{:,.0f}"
           description: "Kallisto: Number of genes with detected expression"
+  assess_strandedness:
+    plot_type: "generalstats"
+    pconfig:
+      - input_strandedness:
+          title: "Input Strandedness"
+          description: "Strandedness entered in input samplesheet"
+      - inferred_strandedness:
+          title: "Inferred Strandedness"
+          description: "Strandedness determined from PICARD_COLLECTRNASEQMETRICS results"
+      - input_strand_correct:
+          title: "Strandedness Correct"
+          description: "Correctness of input strandedness"
 
 sp:
   htseq_expression_genstats:
     fn: "*.htseq.summary.txt"
   kallisto_expression_genstats:
     fn: "*.kallisto.customsummary.txt"
+  assess_strandedness:
+    fn: "*.strandedness.txt"
diff --git a/conf/modules.config b/conf/modules.config
index 578c2a4..c0e4dc9 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -485,15 +485,15 @@ process {
         ]
     }
 
-    withName: '.*:INFER_STRAND:STAR_ALIGN' {
+    withName: '.*:INFER_STRAND:.*' {
         publishDir = [
             enabled:false
         ]
     }
 
-    withName: '.*:INFER_STRAND:SEQTK_SAMPLE' {
+    withName: EXTRACTSTRAND {
         publishDir = [
-            enabled: false
+            enabled:false
         ]
     }
 
diff --git a/modules/local/extractstrand/main.nf b/modules/local/extractstrand/main.nf
index 3de930f..031c96d 100755
--- a/modules/local/extractstrand/main.nf
+++ b/modules/local/extractstrand/main.nf
@@ -11,7 +11,7 @@ process EXTRACTSTRAND {
     tuple val(meta), path(metrics)
 
     output:
-    tuple val(meta), path("*.determination.txt"), emit: strand
+    tuple val(meta), path("*.strandedness.txt"), emit: strand
     path "versions.yml"                       , emit: versions
 
     when:
@@ -36,16 +36,16 @@ process EXTRACTSTRAND {
         determination = "no"
 
     strandedness_correct = True
-    if "${meta.strandedness}" != "auto":
-        if determination == "${meta.strandedness}":
-            strandedness_correct = False
+    if "${meta.strandedness}" == determination:
+        strandedness_correct = False
 
-    with open("${prefix}.determination.txt",'w') as f:
-        f.write("${meta.id}\\t${meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n")
+    with open("${prefix}.strandedness.txt",'w') as f:
+        f.write("\\tinput_strandedness\\tinferred_strandedness\\tinput_strand_correct\\n")
+        f.write("${meta.id}\\t${meta.auto_strandedness ? "auto" : meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n")
 
     with open("versions.yml", 'w') as f:
-        f.write("${task.process}:")
-        f.write("    pandas:1.5.2")
+        f.write("${task.process}:\\n")
+        f.write("    pandas: 1.5.2\\n")
     """
 
     stub:
@@ -53,11 +53,11 @@ process EXTRACTSTRAND {
     """
     #!/usr/bin/python
 
-    with open("${prefix}.determination.txt", 'w') as f:
+    with open("${prefix}.strandedness.txt", 'w') as f:
         pass
 
     with open("versions.yml", 'w') as f:
-        f.write("${task.process}:")
-        f.write("    pandas:1.5.2")
+        f.write("${task.process}:\\n")
+        f.write("    pandas:1.5.2\\n")
     """
 }
diff --git a/subworkflows/local/infer_strand.nf b/subworkflows/local/infer_strand.nf
index 03bef6a..5b808f4 100755
--- a/subworkflows/local/infer_strand.nf
+++ b/subworkflows/local/infer_strand.nf
@@ -20,7 +20,7 @@ workflow INFER_STRAND {
 
     reads_branch = reads
         .branch{meta, reads ->
-            auto: meta.strandedness == "auto"
+            auto: meta.auto_strandedness
             other: true
         }
 
@@ -66,7 +66,6 @@ workflow INFER_STRAND {
             }, by:[0]
         ).map{ sample, strand_txt, meta, reads ->
             def new_meta = meta.clone()
-            new_meta["input_strandedness"] = new_meta["strandedness"]
             new_meta["strandedness"] = strand_txt.text.split("\\t")[2]
             [new_meta, reads]
         }.mix( reads_branch.other )
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 4e3318b..9831ea7 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -53,6 +53,7 @@ def create_fastq_channel(LinkedHashMap row) {
         meta.umi2 = meta.umi2.toInteger() * "N"
     } catch(Exception e) { }
     meta.strandedness = row.strand ? (row.strand.trim() == "" ? "auto" : row.strand.trim()) : "auto"
+    meta.auto_strandedness = meta.strandedness == "auto" ? true : false
     if (! ["yes","no","reverse","auto"].contains(meta.strandedness)){
         exit 1, "ERROR: Please check input samplesheet -> strand value is invalid!\n${row.strand ? row.strand : ""}"
     }
diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf
index 4865128..97f42d2 100644
--- a/subworkflows/local/qc.nf
+++ b/subworkflows/local/qc.nf
@@ -7,6 +7,8 @@ include {
     MULTIQC as MULTIQC_COLLECT
 } from '../../modules/nf-core/multiqc/main'
 include { BAM_RSEQC                   } from '../nf-core/bam_rseqc/main'
+include { EXTRACTSTRAND               } from '../../modules/local/extractstrand/main'
+
 
 workflow QC {
 
@@ -40,6 +42,11 @@ workflow QC {
     )
     ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first())
 
+    EXTRACTSTRAND(
+        PICARD_COLLECTRNASEQMETRICS.out.metrics
+    )
+    ch_versions = ch_versions.mix(EXTRACTSTRAND.out.versions.first())
+
     PICARD_COLLECTHSMETRICS(
         bam
             .filter{ meta, bam ->
@@ -58,6 +65,7 @@ workflow QC {
 
     multiqc_files = multiqc_files
         .mix(PICARD_COLLECTRNASEQMETRICS.out.metrics)
+        .mix(EXTRACTSTRAND.out.strand)
         .mix(PICARD_COLLECTHSMETRICS.out.metrics)
         .mix(BAM_RSEQC.out.bamstat_txt)
         .mix(BAM_RSEQC.out.innerdistance_freq)

From e4ab6365799078abf1ed14867d90adba73856d62 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 20 Nov 2023 19:10:14 -0500
Subject: [PATCH 3/6] fix reporting of strand correctness in multiqc report

---
 assets/analysis_multiqc_config.yml  | 18 +++++++----------
 modules/local/extractstrand/main.nf | 30 ++++++++++++-----------------
 subworkflows/local/infer_strand.nf  |  3 ++-
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/assets/analysis_multiqc_config.yml b/assets/analysis_multiqc_config.yml
index 3d8a262..0c31834 100644
--- a/assets/analysis_multiqc_config.yml
+++ b/assets/analysis_multiqc_config.yml
@@ -22,17 +22,13 @@ custom_data:
           format: "{:,.0f}"
           description: "Kallisto: Number of genes with detected expression"
   assess_strandedness:
-    plot_type: "generalstats"
+    file_format: 'tsv'
+    section_name: 'Strandedness'
+    description: "Pass/Fail status of sample strandedness based on the input file"
+    plot_type: 'table'
     pconfig:
-      - input_strandedness:
-          title: "Input Strandedness"
-          description: "Strandedness entered in input samplesheet"
-      - inferred_strandedness:
-          title: "Inferred Strandedness"
-          description: "Strandedness determined from PICARD_COLLECTRNASEQMETRICS results"
-      - input_strand_correct:
-          title: "Strandedness Correct"
-          description: "Correctness of input strandedness"
+      id: 'strandedness'
+      namespace: 'strandedness_table'
 
 sp:
   htseq_expression_genstats:
@@ -40,4 +36,4 @@ sp:
   kallisto_expression_genstats:
     fn: "*.kallisto.customsummary.txt"
   assess_strandedness:
-    fn: "*.strandedness.txt"
+    fn: "*.strandedness.tsv"
diff --git a/modules/local/extractstrand/main.nf b/modules/local/extractstrand/main.nf
index 031c96d..b78f1f8 100755
--- a/modules/local/extractstrand/main.nf
+++ b/modules/local/extractstrand/main.nf
@@ -11,7 +11,7 @@ process EXTRACTSTRAND {
     tuple val(meta), path(metrics)
 
     output:
-    tuple val(meta), path("*.strandedness.txt"), emit: strand
+    tuple val(meta), path("*.strandedness.tsv"), emit: strand
     path "versions.yml"                       , emit: versions
 
     when:
@@ -25,23 +25,17 @@ process EXTRACTSTRAND {
     import pandas as pd
 
     df = pd.read_csv("${metrics}", skiprows=(lambda x: x not in [6, 7]), sep="\\t")
-    r1_transcript_strand_reads = int(df.iloc[0]['NUM_R1_TRANSCRIPT_STRAND_READS'])
-    r2_transcript_strand_reads = int(df.iloc[0]['NUM_R2_TRANSCRIPT_STRAND_READS'])
-
-    if r1_transcript_strand_reads/3 > r2_transcript_strand_reads:
-        determination = "yes"
-    elif r2_transcript_strand_reads/3 > r1_transcript_strand_reads:
-        determination = "reverse"
-    else:
-        determination = "no"
-
-    strandedness_correct = True
-    if "${meta.strandedness}" == determination:
-        strandedness_correct = False
-
-    with open("${prefix}.strandedness.txt",'w') as f:
-        f.write("\\tinput_strandedness\\tinferred_strandedness\\tinput_strand_correct\\n")
-        f.write("${meta.id}\\t${meta.auto_strandedness ? "auto" : meta.strandedness}\\t" + determination + "\\t" + str(strandedness_correct) + "\\n")
+    df = df.drop(columns = [col for col in list(df) if col not in ['NUM_R1_TRANSCRIPT_STRAND_READS','NUM_R2_TRANSCRIPT_STRAND_READS']])
+
+    df['input_strandedness']    = "${meta.auto_strandedness ? "auto" : meta.strandedness}"
+    df['inferred_strandedness'] = df.apply(lambda row: "yes" if row['NUM_R1_TRANSCRIPT_STRAND_READS']/3 >= row['NUM_R2_TRANSCRIPT_STRAND_READS'] else "reverse" if row['NUM_R2_TRANSCRIPT_STRAND_READS']/3 >= row['NUM_R1_TRANSCRIPT_STRAND_READS'] else "no", axis=1)
+    df['input_strand_correct']  = df.apply(lambda row: True if "${meta.strandedness}" == row["inferred_strandedness"] else False, axis=1)
+    df.index                    = ['${meta.id}']
+
+    desired_column_order = ['input_strandedness', 'inferred_strandedness', 'input_strand_correct','NUM_R1_TRANSCRIPT_STRAND_READS','NUM_R2_TRANSCRIPT_STRAND_READS']
+    df = df[desired_column_order]
+
+    df.to_csv("${prefix}.strandedness.tsv",sep="\\t", index=True)
 
     with open("versions.yml", 'w') as f:
         f.write("${task.process}:\\n")
diff --git a/subworkflows/local/infer_strand.nf b/subworkflows/local/infer_strand.nf
index 5b808f4..a5d8330 100755
--- a/subworkflows/local/infer_strand.nf
+++ b/subworkflows/local/infer_strand.nf
@@ -66,7 +66,8 @@ workflow INFER_STRAND {
             }, by:[0]
         ).map{ sample, strand_txt, meta, reads ->
             def new_meta = meta.clone()
-            new_meta["strandedness"] = strand_txt.text.split("\\t")[2]
+            new_meta["strandedness"] = strand_txt.readLines()[1].split("\\t")[2]
+
             [new_meta, reads]
         }.mix( reads_branch.other )
 

From c2e21e0286fb8390a25dc2025ae7f07f1a3d6a14 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 20 Nov 2023 19:40:40 -0500
Subject: [PATCH 4/6] fix linting with prettier

---
 assets/analysis_multiqc_config.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/assets/analysis_multiqc_config.yml b/assets/analysis_multiqc_config.yml
index 0c31834..9c91c9a 100644
--- a/assets/analysis_multiqc_config.yml
+++ b/assets/analysis_multiqc_config.yml
@@ -22,13 +22,13 @@ custom_data:
           format: "{:,.0f}"
           description: "Kallisto: Number of genes with detected expression"
   assess_strandedness:
-    file_format: 'tsv'
-    section_name: 'Strandedness'
+    file_format: "tsv"
+    section_name: "Strandedness"
     description: "Pass/Fail status of sample strandedness based on the input file"
-    plot_type: 'table'
+    plot_type: "table"
     pconfig:
-      id: 'strandedness'
-      namespace: 'strandedness_table'
+      id: "strandedness"
+      namespace: "strandedness_table"
 
 sp:
   htseq_expression_genstats:

From 37da1dd1795abbcb03d1183cd3fdd480afad56e9 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 20 Nov 2023 19:41:10 -0500
Subject: [PATCH 5/6] fix docker registry issues

---
 modules/local/agfusion/batch/main.nf         | 4 ++--
 modules/local/agfusion/download/main.nf      | 4 ++--
 modules/local/metafusion/main.nf             | 4 ++--
 modules/local/oncokb/fusionannotator/main.nf | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/local/agfusion/batch/main.nf b/modules/local/agfusion/batch/main.nf
index 95f3fa2..0544b5d 100644
--- a/modules/local/agfusion/batch/main.nf
+++ b/modules/local/agfusion/batch/main.nf
@@ -5,8 +5,8 @@ process AGFUSION_BATCH {
     // Note: 2.7X indices incompatible with AWS iGenomes.
     conda 'bioconda::agfusion=1.252'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'cmopipeline/agfusion:0.0.6' :
-        'cmopipeline/agfusion:0.0.6' }"
+        'docker://cmopipeline/agfusion:0.0.6' :
+        'docker://cmopipeline/agfusion:0.0.6' }"
 
     input:
     tuple val(meta), path(fusions)
diff --git a/modules/local/agfusion/download/main.nf b/modules/local/agfusion/download/main.nf
index ad76dac..6647777 100644
--- a/modules/local/agfusion/download/main.nf
+++ b/modules/local/agfusion/download/main.nf
@@ -4,8 +4,8 @@ process AGFUSION_DOWNLOAD {
     // Note: 2.7X indices incompatible with AWS iGenomes.
     conda 'bioconda::agfusion=1.252'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'cmopipeline/agfusion:0.0.6' :
-        'cmopipeline/agfusion:0.0.6' }"
+        'docker://cmopipeline/agfusion:0.0.6' :
+        'docker://cmopipeline/agfusion:0.0.6' }"
 
     input:
     val(ensembl_release)
diff --git a/modules/local/metafusion/main.nf b/modules/local/metafusion/main.nf
index e3e8120..40b90ee 100644
--- a/modules/local/metafusion/main.nf
+++ b/modules/local/metafusion/main.nf
@@ -3,8 +3,8 @@ process METAFUSION {
     label "process_low"
 
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'cmopipeline/metafusion:0.0.6' :
-        'cmopipeline/metafusion:0.0.6' }"
+        'docker://cmopipeline/metafusion:0.0.6' :
+        'docker://cmopipeline/metafusion:0.0.6' }"
 
     input:
     tuple val(meta), path(cff)
diff --git a/modules/local/oncokb/fusionannotator/main.nf b/modules/local/oncokb/fusionannotator/main.nf
index 1a263c5..bd311e7 100644
--- a/modules/local/oncokb/fusionannotator/main.nf
+++ b/modules/local/oncokb/fusionannotator/main.nf
@@ -5,8 +5,8 @@ process ONCOKB_FUSIONANNOTATOR {
     // Note: 2.7X indices incompatible with AWS iGenomes.
     //conda "shahcompbio::oncokb-annotator=2.3.3"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'cmopipeline/oncokb-annotator:0.0.1' :
-        'cmopipeline/oncokb-annotator:0.0.1' }"
+        'docker://cmopipeline/oncokb-annotator:0.0.1' :
+        'docker://cmopipeline/oncokb-annotator:0.0.1' }"
 
     input:
     tuple val(meta), path(cff)

From ac13c081f2b98fdf60d9135611faef71c54b810f Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Fri, 22 Dec 2023 20:52:27 -0500
Subject: [PATCH 6/6] enable module binaries

---
 conf/base.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index c799813..c9129d2 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -67,3 +67,5 @@ process {
         cache = false
     }
 }
+
+nextflow.enable.moduleBinaries = true