Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

index bam before calling samtools idxstats; warn user if input lacks reads #102

Merged
merged 6 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ class QCError(RuntimeError):
'''Indicates a failure at a QC step.'''

def __init__(self, reason):
super(QCError, self).__init__(reason)
super(QCError, self).__init__(reason)

class InvalidBamHeaderError(ValueError):
'''Indicates a malformed or otherwise unusable bam file header'''
pass
7 changes: 7 additions & 0 deletions read_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,9 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None,
ref_indexed = util.file.mkstempfname('.reference.fasta')
shutil.copyfile(refFasta, ref_indexed)

if samtools.isEmpty(inBam):
log.warning("The input bam file appears to have zero reads: %s", inBam)

mm2.align_bam(inBam, ref_indexed, bam_aligned)

if filterReadsAfterAlignment:
Expand All @@ -1401,6 +1404,10 @@ def minimap2_idxstats(inBam, refFasta, outBam=None, outStats=None,
shutil.move(bam_aligned, bam_filtered)

if outStats is not None:
# index the final bam before calling idxstats
# but only if it is a bam or cram file (sam cannot be indexed)
if (bam_filtered.endswith(".bam") or bam_filtered.endswith(".cram")):
samtools.index(bam_filtered)
samtools.idxstats(bam_filtered, outStats)

if outBam is None:
Expand Down
1 change: 1 addition & 0 deletions tools/bwa.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import tools.picard
import util.file
import util.misc
from errors import *

TOOL_NAME = 'bwa'

Expand Down
7 changes: 6 additions & 1 deletion tools/minimap2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import tools.picard
import util.file
import util.misc
from errors import *

TOOL_NAME = 'minimap2'

Expand Down Expand Up @@ -80,6 +81,7 @@ def align_bam(self, inBam, refDb, outBam, options=None,
log.warning("No alignment output for RG %s in file %s against %s", rg, inBam, refDb)

if len(align_bams)==0:
log.warning("All read groups in file %s appear to be empty.", inBam)
with util.file.tempfname('.empty.sam') as empty_sam:
samtools.dumpHeader(inBam, empty_sam)
samtools.sort(empty_sam, outBam)
Expand All @@ -92,6 +94,7 @@ def align_bam(self, inBam, refDb, outBam, options=None,
picardOptions=picardOptions,
JVMmemory=JVMmemory
)

for bam in align_bams:
os.unlink(bam)

Expand Down Expand Up @@ -181,8 +184,10 @@ def align_one_rg(self, inBam, refDb, outBam, rgid=None, preset=None, options=Non

# perform actual alignment
if samtools.isEmpty(one_rg_inBam):
log.warning("Input file %s appears to lack reads for RG '%s'", inBam, rgid)
# minimap doesn't like empty inputs, so copy empty bam through
samtools.sort(one_rg_inBam, outBam)
# samtools.sort(one_rg_inBam, outBam)
self.align_cmd(one_rg_inBam, refDb, outBam, options=options, threads=threads)
else:
self.align_cmd(one_rg_inBam, refDb, outBam, options=options, threads=threads)

Expand Down
19 changes: 7 additions & 12 deletions tools/novoalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@
either in $PATH or $NOVOALIGN_PATH.
'''

import tools
import tools.picard
import tools.samtools
import util.file
import util.misc

import logging
import os
import os.path
Expand All @@ -21,11 +15,16 @@
import stat
import sys

import tools
import tools.picard
import tools.samtools
import util.file
import util.misc
from errors import *

_log = logging.getLogger(__name__)

TOOL_NAME = "novoalign"
#TOOL_VERSION = "3.09.04"


class NovoalignTool(tools.Tool):

Expand Down Expand Up @@ -217,7 +216,3 @@ def index_fasta(self, refFasta, k=None, s=None):
os.chmod(outfname, mode)
except (IOError, OSError):
_log.warning('could not chmod "%s", this is likely OK', refFasta)


class InvalidBamHeaderError(ValueError):
pass
Loading