Skip to content

Commit cf44fe7

Browse files
authored
REFERENCE_SEQUENCE I BANISH THEE (#39)
* add 1.96 version to picard calls * add set -o pipefail to bwa call per jaeyoung bug ticket, update version * remove ref seq from default args, default_args probably needs another refactor * fix dis test
1 parent 4741aa1 commit cf44fe7

File tree

5 files changed

+29
-14
lines changed

5 files changed

+29
-14
lines changed

bin/cmo_picard

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ if __name__ =='__main__':
4444
parser.add_argument("--CREATE_MD5_FILE", action="store_true")
4545
parser.add_argument("--CREATE_INDEX", action="store_true")
4646
list_of_args = ['TMP_DIR', 'VERBOSITY', 'VALIDATION_STRINGENCY', 'COMPRESSION_LEVEL', 'MAX_RECORDS_IN_RAM']
47-
if R_parameter == False:
48-
list_of_args.append('REFERENCE_SEQUENCE')
47+
#if R_parameter == False:
48+
# list_of_args.append('REFERENCE_SEQUENCE')
4949
for arg in list_of_args:
5050
parser.add_argument("--"+arg, action="store")
5151
cmo.util.add_logging_options(parser)

cmo/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# This file is originally generated from Git information by running 'setup.py
33
# version'. Distribution tarballs contain a pre-generated copy of this file.
44

5-
__version__ = '1.4.4'
5+
__version__ = '1.4.5'

cmo/gatk.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import os, sys
1+
import os, sys, tempfile
22
from . import util
33
logger = util.get_logger()
44

55

66

77
class Gatk:
8-
def __init__(self,version="default", java_version="default", java_args="-Xmx48g -Xms256m -XX:-UseGCOverheadLimit -Djava.io.tmpdir=/scratch/", temp_dir="/scratch", mutect=False):
8+
def __init__(self,version="default", java_version="default", java_args="-Xmx48g -Xms256m -XX:-UseGCOverheadLimit", temp_dir="/scratch", mutect=False):
99
try:
1010
if mutect:
1111
self.gatk_jar=util.programs["mutect"][version]
@@ -27,7 +27,12 @@ def __init__(self,version="default", java_version="default", java_args="-Xmx48g
2727
def gatk_cmd(self, command, java_args_override=None, command_specific_args={}):
2828
cmd = [self.java_cmd, self.java_args]
2929
if(self.temp_dir != None):
30-
cmd = cmd + ["-Djava.io.tmpdir="+self.temp_dir]
30+
if os.path.exists(self.temp_dir):
31+
cmd = cmd + ["-Djava.io.tmpdir="+self.temp_dir]
32+
elif os.path.exists("/srv/data/scratch"):
33+
cmd = cmd + ["-Djava.io.tmpdir=/srv/data/scratch"]
34+
else:
35+
cmd = cmd + ["-Djava.io.tmpdir=" + tempfile.mkdtemp()]
3136
cmd = cmd + [ "-jar", self.gatk_jar, "-T",command]
3237
for arg, value in command_specific_args.items():
3338
if value != None:

cmo/picard.py

-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def __init__(self,version="default", java_version="default", java_args="-Xmx2g")
2727
"MAX_RECORDS_IN_RAM": "5000000",
2828
"CREATE_INDEX": "true",
2929
"CREATE_MD5_FILE": "false",
30-
"REFERENCE_SEQUENCE": "null",
3130
# "GA4GH_CLIENT_SECRETS":"null",
3231
}
3332

@@ -49,11 +48,6 @@ def picard_cmd(self, command, default_args_override={}, command_specific_args={}
4948
cmd = cmd + [arg + "="+ str(value)]
5049
elif value != None and value !=False:
5150
cmd = cmd + [arg + "=" + value]
52-
elif command_specific_args[arg] == None or command_specific_args[arg] == False:
53-
if value==True:
54-
cmd = cmd + [arg + "="+ str(value)]
55-
elif value != None and value !=False:
56-
cmd = cmd + [arg + "=" + value]
5751
for arg, value in command_specific_args.items():
5852
if(isinstance(value, list)):
5953
for arg_value in value:

test/test_production_commands.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def setup_module():
3030
global TEST_TEMP_DIR
3131
if os.path.exists("/scratch"):
3232
TEST_TEMP_DIR = tempfile.mkdtemp(dir="/scratch");
33+
elif os.path.exists("/srv/data/scratch"):
34+
TEST_TEMP_DIR = tempfile.mkdtemp(dir="/srv/data/scratch");
3335
else:
3436
TEST_TEMP_DIR = tempfile.mkdtemp();
3537
global output
@@ -51,6 +53,8 @@ def test_abra():
5153
'--reference_sequence', genome_string,
5254
'--targets', input_bed,
5355
'--working', abratmpdir]
56+
print " ".join(cmd)
57+
5458
prog_output = subprocess.check_output(" ".join(cmd), shell=True)
5559
#check prog_output to see if it picked up the arguments we gave...
5660
assert_true(re.search("input0: /ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam", prog_output))
@@ -106,6 +110,7 @@ def test_printreads():
106110
'--num_cpu_threads_per_data_thread', '6',
107111
'--out', output,
108112
'--reference_sequence', genome_string]
113+
print " ".join(cmd)
109114
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
110115
assert_true(re.search("INFO .* HelpFormatter - Program Args: -T PrintReads --input_file /ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam --num_cpu_threads_per_data_thread 6 --BQSR /ifs/work/charris/testdata_for_cmo/recal.matrix --reference_sequence /ifs/depot/assemblies/H.sapiens/b37/b37.fasta --out", prog_output))
111116

@@ -125,6 +130,7 @@ def test_baserecal():
125130
'--java_args', "'-Xmx48g -Xms256m -XX:-UseGCOverheadLimit'",
126131
'--out', output,
127132
'--reference_sequence', genome_string]
133+
print " ".join(cmd)
128134
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
129135
assert_true(re.search("INFO .* HelpFormatter - Program Args: -T BaseRecalibrator --input_file /ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam --reference_sequence /ifs/depot/assemblies/H.sapiens/b37/b37.fasta --knownSites /ifs/work/charris/temp_depot/dbsnp_138.b37.excluding_sites_after_129.vcf --knownSites /ifs/work/charris/temp_depot/hapmap_3.3.b37.vcf --knownSites /ifs/work/charris/temp_depot/1000G_phase1.snps.high_confidence.b37.vcf --knownSites /ifs/work/charris/temp_depot/Mills_and_1000G_gold_standard.indels.b37.vcf --covariate ContextCovariate --covariate CycleCovariate --covariate ReadGroupCovariate --covariate QualityScoreCovariate --out", prog_output))
130136

@@ -143,6 +149,7 @@ def test_addorreplacereadgroups():
143149
'--SM', 'P-0000377-T02-IM3',
144150
'--SO', 'coordinate',
145151
'--TMP_DIR', tmpdir]
152+
print " ".join(cmd)
146153
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
147154
print prog_output
148155
assert_true(re.search("picard.sam.AddOrReplaceReadGroups INPUT=/ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam OUTPUT=.* SORT_ORDER=coordinate RGID=P-0000377 RGLB=5 RGPL=Illumina RGPU=bc26 RGSM=P-0000377-T02-IM3 RGCN=MSKCC TMP_DIR=\["+current_dir+"\] CREATE_INDEX=true VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false", prog_output))
@@ -162,6 +169,7 @@ def test_trimgalore():
162169
fastq1,
163170
fastq2,
164171
]
172+
print " ".join(cmd)
165173
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
166174
assert_true("/opt/common/CentOS_6/trim_galore/Trim_Galore_v0.2.5/trim_galore --adapter AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATGAGCATCTCGTATGCCGTCTTCTGCTTG --suppress_warn --paired --length 25 --gzip --quality 1 --adapter2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT /ifs/work/charris/testdata_for_cmo/P1_R1.fastq.gz /ifs/work/charris/testdata_for_cmo/P1_R2.fastq.gz", prog_output)
167175

@@ -186,6 +194,7 @@ def test_vardict():
186194
'-x', '2000',
187195
'-z', '1',
188196
input_bed ]
197+
print " ".join(cmd)
189198
subprocess.check_call(" ".join(cmd), shell=True)
190199

191200

@@ -211,6 +220,7 @@ def test_somaticindeldetector():
211220
'--read_filter', 'UnmappedRead',
212221
'--read_filter', 'MappingQuality',
213222
'--read_filter', 'BadCigar']
223+
print " ".join(cmd)
214224
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
215225
assert_true(re.search("INFO .* HelpFormatter - Program Args: -T SomaticIndelDetector --input_file:normal /ifs/work/charris/testdata_for_cmo/P2_ADDRG_MD.abra.fmi.printreads.bam --input_file:tumor /ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam --min_mapping_quality_score 20 --intervals /ifs/work/charris/testdata_for_cmo/intervals.bed --filter_expressions T_COV<10||N_COV<4||T_INDEL_F<0.0001||T_INDEL_CF<0.7 --maxNumberOfReads 100000 --verboseOutput .* --read_filter DuplicateRead --read_filter FailsVendorQualityCheck --read_filter NotPrimaryAlignment --read_filter BadMate --read_filter MappingQualityUnavailable --read_filter UnmappedRead --read_filter MappingQuality --read_filter BadCigar --reference_sequence /ifs/depot/assemblies/H.sapiens/b37/b37.fasta --out .*", prog_output))
216226

@@ -223,6 +233,7 @@ def test_findcoveredintervals():
223233
'--out',output,
224234
'--reference_sequence', genome_string,
225235
'--input_file',normal_bam]
236+
print " ".join(cmd)
226237
prog_output = subprocess.check_output(" ".join(cmd),shell=True, stderr=subprocess.STDOUT)
227238
assert_true(re.search("INFO .* HelpFormatter - Program Args: -T FindCoveredIntervals --input_file /ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam --input_file /ifs/work/charris/testdata_for_cmo/P2_ADDRG_MD.abra.fmi.printreads.bam --out .* --reference_sequence /ifs/depot/assemblies/H.sapiens/b37/b37.fasta",prog_output))
228239

@@ -234,6 +245,7 @@ def test_pindel():
234245
'--output-prefix','Tumor',
235246
'--sample_names','"Normal Tumor"']
236247

248+
print " ".join(cmd)
237249
prog_output = subprocess.check_output(" ".join(cmd),shell=True, stderr=subprocess.STDOUT)
238250
#print prog_output
239251
assert_true(re.search(tumor_bam,prog_output))
@@ -247,6 +259,7 @@ def test_index():
247259
cmd = ['cmo_index',
248260
'--normal',normal_bam,
249261
'--tumor',tumor_bam]
262+
print " ".join(cmd)
250263
prog_output = subprocess.check_output(" ".join(cmd),shell=True,stderr=subprocess.STDOUT)
251264
assert_true(re.search(normal_bam,prog_output))
252265
assert_true(re.search(tumor_bam,prog_output))
@@ -261,18 +274,21 @@ def test_markduplicates():
261274
'--O', output,
262275
'--TMP_DIR', tmpdir
263276
]
277+
print " ".join(cmd)
264278
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
265-
assert_true(re.search("picard.sam.markduplicates.MarkDuplicates INPUT=\[/ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam\] OUTPUT=.* METRICS_FILE=.* TMP_DIR=\["+current_dir+"\] CREATE_INDEX=true MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 SORTING_COLLECTION_SIZE_RATIO=0.25 PROGRAM_RECORD_ID=MarkDuplicates PROGRAM_GROUP_NAME=MarkDuplicates REMOVE_DUPLICATES=false ASSUME_SORTED=false DUPLICATE_SCORING_STRATEGY=SUM_OF_BASE_QUALITIES READ_NAME_REGEX=\[a-zA-Z0-9\]\+:\[0-9\]:\(\[0-9\]\+\):\(\[0-9\]\+\):\(\[0-9\]\+\).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_MD5_FILE=false", prog_output))
279+
print prog_output
280+
assert_true(re.search("INPUT=\[/ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam\] OUTPUT=.* METRICS_FILE=.* TMP_DIR=.* CREATE_INDEX=true.*", prog_output))
266281

267282
def test_fixmateinformation():
268283
cmd = ['cmo_picard',
269284
'--version 1.96',
270285
'--cmd', 'FixMateInformation',
271286
'--I', tumor_bam,
272287
'--O', output]
288+
print cmd
273289
prog_output = subprocess.check_output(" ".join(cmd), shell=True, stderr=subprocess.STDOUT)
274290
print prog_output
275-
assert_true(re.search("picard.sam.FixMateInformation INPUT=\[/ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam\] OUTPUT=.* ASSUME_SORTED=false ADD_MATE_CIGAR=true VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false", prog_output))
291+
assert_true(re.search("INPUT=\[/ifs/work/charris/testdata_for_cmo/P1_ADDRG_MD.abra.fmi.printreads.bam\] OUTPUT=.* VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false", prog_output))
276292

277293

278294

0 commit comments

Comments
 (0)