Skip to content

Commit d82f750

Browse files
committed
Increase chunk size in split_reads; support GRCh37_mm10 in vcf2maf
1 parent a060c34 commit d82f750

File tree

2 files changed

+11
-20
lines changed

2 files changed

+11
-20
lines changed

bin/cmo_split_reads

+6-18
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ def chunk(fastq, platform_unit, lines_per_chunk, num_pieces):
1313
output_prefix = "-".join(exploded)
1414
while lines_per_chunk % 4 != 0:
1515
lines_per_chunk +=1
16-
fh = io.BufferedReader(gzip.open(fastq, "rb"))
1716
output_file_count = 0
1817
output_file_lines = 0
1918
#these aren't relaly gz but trimgalore doesnt like files not named gz...great work trimgalore
@@ -24,17 +23,19 @@ def chunk(fastq, platform_unit, lines_per_chunk, num_pieces):
2423
for line in os.popen("zcat < " + fastq):
2524
lines.append(line)
2625
output_file_lines+=1
27-
if output_file_lines == lines_per_chunk:
26+
if output_file_lines == lines_per_chunk and int(num_pieces) > 1:
2827
if(output_file_count < int(num_pieces)-1):
2928
output_file_lines=0
30-
ofh.write("".join(lines))
29+
for l in lines:
30+
ofh.write(l)
3131
ofh.close()
3232
lines = list()
3333
output_file_count +=1
3434
filename = output_prefix + "chunk{:0>3d}".format(output_file_count) + ".fastq.gz"
3535
logger.info("Opening %s and writing reads..." % (filename))
3636
ofh = gzip.open(filename, "wb", 1)
37-
ofh.write("".join(lines))
37+
for l in lines:
38+
ofh.write(l)
3839
ofh.close()
3940
return True
4041

@@ -52,7 +53,7 @@ if __name__ =='__main__':
5253
fastqs.append(args.fastq2)
5354
filesize = os.path.getsize(fastqs[0])
5455
logger.info("Fastq1 Filesize: %sGB" % ("{:.2f}".format(float(filesize)/1000000000)))
55-
num_pieces = math.ceil(float(filesize)/350000000)
56+
num_pieces = math.ceil(float(filesize)/2800000000)
5657
logger.info("Splitting into %s pieces" % "{:.0f}".format(num_pieces))
5758
num_lines = sum(1 for line in os.popen("zcat " + fastqs[0]))
5859
lines_per_chunk = math.ceil(float(num_lines) / int(num_pieces))
@@ -62,16 +63,3 @@ if __name__ =='__main__':
6263
result = pool.apply_async(chunk, args=(fastq, args.platform_unit, lines_per_chunk, num_pieces, ))
6364
pool.close()
6465
pool.join()
65-
66-
67-
68-
69-
70-
71-
72-
73-
74-
75-
76-
77-

bin/cmo_vcf2maf

+5-2
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ if __name__ =='__main__':
2828
preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
2929
preparser.add_argument("--vep-release", help="Version of VEP and its cache to use", choices=cmo.util.programs['vep'].keys(), default="default")
3030
preparser.add_argument("--species", help="Species of variants in input", choices=["homo_sapiens","mus_musculus"], default="homo_sapiens")
31-
preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
31+
preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38","GRCh37_mm10"], default="GRCh37")
3232
options, _ = preparser.parse_known_args()
33-
33+
if options.ncbi_build == 'GRCh37_mm10':
34+
options.ncbi_build = 'GRCh37'
3435
# Figure out the path to the actual Perl script that this Python wrapper will run
3536
script_path = cmo.util.programs['vcf2maf'][options.version] + "vcf2maf.pl"
3637
# Extract arguments and their defaults, by parsing the --help output
@@ -62,6 +63,8 @@ if __name__ =='__main__':
6263

6364
# Now run the argparse instance, which will parse and execute, or print help text if requested
6465
args = parser.parse_args()
66+
if args.ncbi_build == 'GRCh37_mm10':
67+
args.ncbi_build = 'GRCh37'
6568
args_dict = vars(args)
6669

6770
# Unless user-defined, assume that sample IDs are the same in VCF and MAF

0 commit comments

Comments
 (0)