Skip to content

Commit 2a0140e

Browse files
authored
Minor update (#31)
* helper script to index at start of module3..could be use for all modules * increment version for cmo_index, etc * initial commit of cmo_index * logging * cmo_split_reads now opens and emits gzips..slow bc native python gzip instead of subprocess zcat * add logger function * use bufferereader for speed improvement on gzip * increment version * version increment AGAIN * install cmo_fillout * longer delay for polling * speed improvement for split_reads...hopefully... * increment version for release * fake gz files for trimgalore * use args.version * use uuid for working dir * use multiprocessing * return return code in all circumstancs * exit with command exit code * confused which object was namespace and which was dict
1 parent c4504a3 commit 2a0140e

File tree

6 files changed

+49
-30
lines changed

6 files changed

+49
-30
lines changed

bin/cmo_abra

+8-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import re, os
55
import subprocess
66

77
import time
8-
import cmo, shutil
8+
import cmo, shutil, uuid
99

1010
'''
1111
This method adds custom arguments to run the wrapper
@@ -148,6 +148,13 @@ def main():
148148
parser = process_options(parser, cmd_options)
149149
# Process the default variable to reflect true paths from json
150150
(args, command_specific_args) = convert_default_variables(parser)
151+
if not os.path.exists(args.working):
152+
try:
153+
os.makedirs(args.working)
154+
except:
155+
print >>sys.stderr, "Unable to create your directory tree for scratch."
156+
sys.exit(1)
157+
command_specific_args['working'] = os.path.join(args.working, str(uuid.uuid4()) + "/")
151158
# Delete keys that are created by wrapper before making the command
152159
command_specific_args = delete_wrapper_specific_args(command_specific_args)
153160
# Make the command that needs to be launched

bin/cmo_bwa_mem

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@ if __name__ =='__main__':
3737
for key in ["genome", "fastq1", "fastq2", "output", "sam", "version"]:
3838
del args_dict[key]
3939
parser.parse_args()
40-
cmo.util.call_cmd(bwa_helper.mem(fasta, fastq1, fastq2, output, args_dict=args_dict, no_bam=sam))
40+
sys.exit(cmo.util.call_cmd(bwa_helper.mem(fasta, fastq1, fastq2, output, args_dict=args_dict, no_bam=sam)))
4141

4242

bin/cmo_gatk

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,6 @@ if __name__ =='__main__':
104104
if not os.path.exists(bam.replace(".bam",".bai"))and not os.path.exists(bam +".bai"):
105105
cmo.util.samtools_index(bam)
106106
cmo.util.remove_logging_options_from_dict(command_specific_args)
107-
cmo.util.call_cmd(gatk_helper.gatk_cmd(options.cmd, command_specific_args=command_specific_args), stderr=stderr, stdout=stdout)
107+
sys.exit(cmo.util.call_cmd(gatk_helper.gatk_cmd(options.cmd, command_specific_args=command_specific_args), stderr=stderr, stdout=stdout))
108108

109109

bin/cmo_split_reads

+37-27
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,38 @@
22

33
import argparse, os, sys, signal, subprocess, math, gzip, io
44
import cmo
5+
import multiprocessing
6+
7+
def chunk(fastq, lines_per_chunk, num_pieces):
8+
logger = cmo.util.get_logger()
9+
output_prefix = os.path.basename(fastq).split(".", 1)[0] + "."
10+
while lines_per_chunk % 4 != 0:
11+
lines_per_chunk +=1
12+
fh = io.BufferedReader(gzip.open(fastq, "rb"))
13+
output_file_count = 0
14+
output_file_lines = 0
15+
#these aren't relaly gz but trimgalore doesnt like files not named gz...great work trimgalore
16+
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
17+
logger.info("Opening %s and writing reads..." % (filename))
18+
ofh = gzip.open(filename, "wb", 1)
19+
lines = list()
20+
for line in os.popen("zcat " + fastq):
21+
lines.append(line)
22+
output_file_lines+=1
23+
if output_file_lines == lines_per_chunk:
24+
if(output_file_count < int(num_pieces)-1):
25+
output_file_lines=0
26+
ofh.write("".join(lines))
27+
ofh.close()
28+
lines = list()
29+
output_file_count +=1
30+
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
31+
logger.info("Opening %s and writing reads..." % (filename))
32+
ofh = gzip.open(filename, "wb", 1)
33+
ofh.write("".join(lines))
34+
ofh.close()
35+
return True
36+
537

638
if __name__ =='__main__':
739
logger = cmo.util.get_logger()
@@ -19,35 +51,13 @@ if __name__ =='__main__':
1951
num_lines = sum(1 for line in os.popen("zcat " + fastqs[0]))
2052
lines_per_chunk = math.ceil(float(num_lines) / int(num_pieces))
2153
logger.info("%s lines per chunk" % str(lines_per_chunk))
54+
pool=multiprocessing.Pool(processes=2)
2255
for fastq in fastqs:
23-
output_prefix = os.path.basename(fastq).split(".", 1)[0] + "."
24-
while lines_per_chunk % 4 != 0:
25-
lines_per_chunk +=1
26-
fh = io.BufferedReader(gzip.open(fastq, "rb"))
27-
output_file_count = 0
28-
output_file_lines = 0
29-
#these aren't relaly gz but trimgalore doesnt like files not named gz...great work trimgalore
30-
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
31-
logger.info("Opening %s and writing reads..." % (filename))
32-
ofh = open(filename, "wb")
33-
lines = list()
34-
for line in os.popen("zcat " + fastq):
35-
lines.append(line)
36-
output_file_lines+=1
37-
if output_file_lines == lines_per_chunk:
38-
if(output_file_count < int(num_pieces)-1):
39-
output_file_lines=0
40-
ofh.write("".join(lines))
41-
ofh.close()
42-
lines = list()
43-
output_file_count +=1
44-
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
45-
logger.info("Opening %s and writing reads..." % (filename))
46-
ofh = open(filename, "wb")
47-
ofh.write("".join(lines))
48-
ofh.close()
56+
result = pool.apply_async(chunk, args=(fastq, lines_per_chunk, num_pieces, ))
57+
pool.close()
58+
pool.join()
4959

50-
60+
5161

5262

5363

cmo/_version.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
# version'. Distribution tarballs contain a pre-generated copy of this file.
44

55
__version__ = '1.4.0'
6+

cmo/util.py

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def call_cmd(cmd, shell=True, stderr=None, stdout=None, stdin=None):
114114
try:
115115
logger.info("EXECUTING: %s" % cmd)
116116
return_code = subprocess.check_call(cmd, shell=shell, stderr=stderr, stdout=stdout, stdin=stdin)
117+
return return_code
117118
except subprocess.CalledProcessError, e:
118119
logger.critical( "Non Zero Exit Code %s from %s" % (e.returncode, cmd))
119120
logger.critical("Bailing out!")

0 commit comments

Comments
 (0)