Skip to content

Commit 2dfaa5b

Browse files
authored
Minor update, deploy fillout, workflow.py time delay, split_reads gzip tweaks for improved speed (#25)
* helper script to index at start of module3..could be use for all modules * increment version for cmo_index, etc * initial commit of cmo_index * logging * cmo_split_reads now opens and emits gzips..slow bc native python gzip instead of subprocess zcat * add logger function * use bufferereader for speed improvement on gzip * increment version * version increment AGAIN * install cmo_fillout * longer delay for polling * speed improvement for split_reads...hopefully... * increment version for release
1 parent aa6f24f commit 2dfaa5b

File tree

4 files changed

+12
-9
lines changed

4 files changed

+12
-9
lines changed

bin/cmo_split_reads

+7-6
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ if __name__ =='__main__':
1616
logger.info("Fastq1 Filesize: %sGB" % ("{:.2f}".format(float(filesize)/1000000000)))
1717
num_pieces = math.ceil(float(filesize)/350000000)
1818
logger.info("Splitting into %s pieces" % "{:.0f}".format(num_pieces))
19-
num_lines = sum(1 for line in io.BufferedReader(gzip.open(fastqs[0])))
19+
num_lines = sum(1 for line in os.popen("zcat " + fastqs[0]))
2020
lines_per_chunk = math.ceil(float(num_lines) / int(num_pieces))
2121
logger.info("%s lines per chunk" % str(lines_per_chunk))
2222
for fastq in fastqs:
@@ -29,20 +29,21 @@ if __name__ =='__main__':
2929
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
3030
logger.info("Opening %s and writing reads..." % (filename))
3131
ofh = io.BufferedWriter(gzip.open(filename, "wb"))
32-
while(1):
33-
line = fh.readline()
34-
if not line:
35-
break;
36-
ofh.write(line)
32+
lines = list()
33+
for line in os.popen("zcat " + fastq):
34+
lines.append(line)
3735
output_file_lines+=1
3836
if output_file_lines == lines_per_chunk:
3937
if(output_file_count < int(num_pieces)-1):
4038
output_file_lines=0
39+
ofh.write("".join(lines))
4140
ofh.close()
41+
lines = list()
4242
output_file_count +=1
4343
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
4444
logger.info("Opening %s and writing reads..." % (filename))
4545
ofh = io.BufferedWriter(gzip.open(filename, "wb"))
46+
ofh.write("".join(lines))
4647
ofh.close()
4748

4849

cmo/_version.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
# This file is originally generated from Git information by running 'setup.py
33
# version'. Distribution tarballs contain a pre-generated copy of this file.
44

5-
__version__ = '1.3.0'
5+
6+
__version__ = '1.3.1'

cmo/workflow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def watcher_daemon(self, log_file):
163163
launcher_log_dir = os.path.join(FW_WFLOW_LAUNCH_LOC, getpass.getuser(), "")
164164
queue_launcher.rapidfire(self.launchpad, fireworks.FWorker(name="LSF"), common_adapter, reserve=True, nlaunches=0, launch_dir=launcher_log_dir, sleep_time=10, njobs_queue=500)
165165
failed_fws = []
166-
time.sleep(10)
166+
time.sleep(50)
167167
# offline_runs = self.launchpad.offline_runs.find({"completed": False, "deprecated": False}, {"launch_id": 1}).count()
168168
# self.launchpad.m_logger.info("%s offline runs found" % offline_runs)
169169
ready_lsf_jobs = self.launchpad.fireworks.find({"state":"READY", "spec._fworker" : "LSF"}).count()

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ def get_version():
115115
'bin/cmo_list2bed',
116116
'bin/cmo_pindel',
117117
'bin/cmo_bcftools',
118-
'bin/cmo_index'
118+
'bin/cmo_index',
119+
'bin/cmo_fillout'
119120
#'bin/cmo_hotspot3d'
120121
],
121122
zip_safe=False)

0 commit comments

Comments
 (0)