Skip to content

Commit 0f56eba

Browse files
committed
Merge branch 'master' of https://github.com/mskcc/cmo
2 parents d602556 + 61de7b1 commit 0f56eba

File tree

4 files changed

+155
-34
lines changed

4 files changed

+155
-34
lines changed

bin/cmo_maf2maf

+20-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
22

3-
import argparse, os, sys, re, subprocess, itertools, glob, tempfile, shutil
3+
import argparse, os, sys, re, subprocess, itertools, glob, getpass, tempfile, shutil
44
from operator import attrgetter
55
import textwrap as _textwrap
66
import cmo
@@ -40,8 +40,8 @@ if __name__ =='__main__':
4040
defaults_dict['--custom-enst'] = cmo.util.programs['vcf2maf'][options.version] + "data/isoform_overrides_at_mskcc"
4141
defaults_dict['--filter-vcf'] = cmo.util.genomes['GRCh37']['exac']
4242
defaults_dict['--retain-cols'] = 'Center,Verification_Status,Validation_Status,Mutation_Status,Sequencing_Phase,Sequence_Source,Validation_Method,Score,BAM_file,Sequencer,Tumor_Sample_UUID,Matched_Norm_Sample_UUID,Caller'
43-
tmp_dir = tempfile.mkdtemp(dir='/scratch') if os.path.exists('/scratch') else tempfile.mkdtemp(dir='/tmp');
44-
defaults_dict['--tmp-dir'] = tmp_dir
43+
tmp_root = "/scratch/<username>/..."
44+
defaults_dict['--tmp-dir'] = tmp_root
4545

4646
# With arguments and defaults set, let's construct an argparse instance
4747
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
@@ -53,12 +53,25 @@ if __name__ =='__main__':
5353
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
5454
else:
5555
parser.add_argument(arg, action="store", metavar='', help=description)
56-
cmo.util.add_logging_options(parser)
5756

5857
# Now run the argparse instance, which will parse and execute, or print help text if requested
5958
args = parser.parse_args()
6059
args_dict = vars(args)
6160

61+
# If user didn't define their own --tmp-dir, let's create one for them under /scratch/username
62+
if args_dict['tmp_dir'] == tmp_root:
63+
# Create a subdirectory under /scratch with the username, if it doesn't already exist
64+
tmp_root = "/scratch/" + getpass.getuser()
65+
if not os.path.exists(tmp_root):
66+
os.makedirs(tmp_root)
67+
# For machines without writable /scratch, default to creating a temp folder under /tmp
68+
tmp_dir = tempfile.mkdtemp(dir=tmp_root) if os.path.exists(tmp_root) else tempfile.mkdtemp(dir='/tmp')
69+
args_dict['tmp_dir'] = tmp_dir
70+
# Show the user a warning about limited storage in temp directories
71+
sys.stderr.write( "WARNING: Writing temporary files to " + tmp_dir + " which could fill " +
72+
"up and interrupt your colleagues' work. If you're working with giant files, then " +
73+
"please define your own --tmp-dir, or we're gonna get ya!\n" )
74+
6275
# Locate VEP and it's cache, the reference FASTA, and the VCF used for filtering
6376
vep_dir = cmo.util.programs['vep'][args.vep_release]
6477
args_dict['vep_data'] = vep_dir
@@ -75,16 +88,13 @@ if __name__ =='__main__':
7588

7689
# Build the command we're going to run
7790
cmd = [cmo.util.programs['perl']['default'], script_path]
78-
stderr = args.stderr
79-
stdout = args.stdout
80-
# Trim out arguments without values, and also any args that might mess with our logging
91+
# Trim out arguments without values
8192
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
82-
cmo.util.remove_logging_options_from_dict(args_dict)
8393

8494
# Make sure the arguments are in a format that the script will accept, and kick it off
8595
for arg, value in args_dict.items():
8696
arg = arg.replace("_","-")
8797
cmd = cmd + ["--"+arg, value]
88-
sys.stderr.write( "Running: " + " ".join( cmd ) + "\n" )
89-
cmo.util.call_cmd( " ".join( cmd ), stdout=stdout, stderr=stderr )
98+
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
99+
cmo.util.call_cmd( " ".join( cmd ))
90100
shutil.rmtree(tmp_dir)

bin/cmo_maf2vcf

+46-14
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,69 @@
11
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
22

33
import argparse, os, sys, re, subprocess, itertools, glob
4+
from operator import attrgetter
5+
import textwrap as _textwrap
46
import cmo
57

8+
# Custom help formatter to display args in alphabetical order, and fitted line wrap for sphinx
9+
class SortingHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
10+
def add_arguments(self, actions):
11+
actions = sorted(actions, key=attrgetter('option_strings'))
12+
super(SortingHelpFormatter, self).add_arguments(actions)
13+
def _split_lines(self, text, width):
14+
text = self._whitespace_matcher.sub(' ', text).strip()
15+
return _textwrap.wrap(text, 78)
16+
17+
# Function that runs --help on the tool we've wrapped, and extracts documentation
618
def parse_script_help(script_path):
719
perl = cmo.util.programs['perl']['default']
8-
help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]),stdout=subprocess.PIPE,shell=True).communicate()[0]
9-
valid_args = re.findall(r"\s+(--[\S_]+)\s+([\S \t]+)\n?", help_text, re.M)
10-
return dict(valid_args)
20+
help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]), stdout=subprocess.PIPE, shell=True).communicate()[0]
21+
valid_args = re.findall(r"^\s*(--\S+)\s+([^\[\n]+)", help_text, re.M)
22+
defaults = re.findall(r"^\s*(--\S+)\s+[\S ]+\[([\S ]+)\]$", help_text, re.M)
23+
return dict(valid_args), dict(defaults)
1124

1225
if __name__ =='__main__':
1326
# We'll first need to figure out which version to run with "-h" to parse the help text
14-
preparser = argparse.ArgumentParser(description="run maf2vcf", add_help=False)
15-
preparser.add_argument("--version", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
27+
preparser = argparse.ArgumentParser(description="Run maf2vcf", add_help=False, formatter_class=SortingHelpFormatter)
28+
preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
29+
preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
1630
options, _ = preparser.parse_known_args()
31+
32+
# Figure out the path to the actual Perl script that this Python wrapper will run
1733
script_path = cmo.util.programs['vcf2maf'][options.version] + "maf2vcf.pl"
18-
args_dict = parse_script_help(script_path)
19-
parser = argparse.ArgumentParser(parents = [preparser], add_help=True)
34+
# Extract arguments and their defaults, by parsing the --help output
35+
args_dict, defaults_dict = parse_script_help(script_path)
36+
37+
# With arguments and defaults set, let's construct an argparse instance
38+
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
2039
for arg, description in args_dict.items():
21-
if arg == "--help":
40+
# Hide a few arguments from the user, because we'll determine them ourselves
41+
if arg in ["--help","--man","--ref-fasta"]:
2242
continue
23-
parser.add_argument(arg,action="store", metavar='', help=description)
24-
cmo.util.add_logging_options(parser)
43+
if arg in defaults_dict and arg not in ["--output-maf"]:
44+
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
45+
else:
46+
parser.add_argument(arg, action="store", metavar='', help=description)
47+
48+
# Now run the argparse instance, which will parse and execute, or print help text if requested
2549
args = parser.parse_args()
2650
args_dict = vars(args)
51+
52+
# Locate the reference for this genome build
53+
args_dict['ref_fasta'] = cmo.util.genomes[args.ncbi_build]['fasta']
54+
55+
# Remove arguments that the actual wrapped tool won't recognize
2756
for key in ["version"]:
2857
del args_dict[key]
58+
59+
# Build the command we're going to run
2960
cmd = [cmo.util.programs['perl']['default'], script_path]
30-
stderr = args.stderr
31-
stdout = args.stdout
61+
# Trim out arguments without values
3262
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
33-
cmo.util.remove_logging_options_from_dict(args_dict)
63+
64+
# Make sure the arguments are in a format that the script will accept, and kick it off
3465
for arg, value in args_dict.items():
3566
arg = arg.replace("_","-")
3667
cmd = cmd + ["--"+arg, value]
37-
cmo.util.call_cmd(" ".join(cmd), stdout=stdout, stderr=stderr)
68+
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
69+
cmo.util.call_cmd( " ".join( cmd ))

bin/cmo_vcf2maf

+20-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
22

3-
import argparse, os, sys, re, subprocess, itertools, glob, tempfile, shutil
3+
import argparse, os, sys, re, subprocess, itertools, glob, getpass, tempfile, shutil
44
from operator import attrgetter
55
import textwrap as _textwrap
66
import cmo
@@ -42,8 +42,8 @@ if __name__ =='__main__':
4242
defaults_dict['--maf-center'] = 'mskcc.org'
4343
defaults_dict['--vcf-tumor-id'] = defaults_dict['--tumor-id']
4444
defaults_dict['--vcf-normal-id'] = defaults_dict['--normal-id']
45-
tmp_dir = tempfile.mkdtemp(dir='/scratch') if os.path.exists('/scratch') else tempfile.mkdtemp(dir='/tmp');
46-
defaults_dict['--tmp-dir'] = tmp_dir
45+
tmp_root = "/scratch/<username>/..."
46+
defaults_dict['--tmp-dir'] = tmp_root
4747

4848
# With arguments and defaults set, let's construct an argparse instance
4949
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
@@ -55,12 +55,25 @@ if __name__ =='__main__':
5555
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
5656
else:
5757
parser.add_argument(arg, action="store", metavar='', help=description)
58-
cmo.util.add_logging_options(parser)
5958

6059
# Now run the argparse instance, which will parse and execute, or print help text if requested
6160
args = parser.parse_args()
6261
args_dict = vars(args)
6362

63+
# If user didn't define their own --tmp-dir, let's create one for them under /scratch/username
64+
if args_dict['tmp_dir'] == tmp_root:
65+
# Create a subdirectory under /scratch with the username, if it doesn't already exist
66+
tmp_root = "/scratch/" + getpass.getuser()
67+
if not os.path.exists(tmp_root):
68+
os.makedirs(tmp_root)
69+
# For machines without writable /scratch, default to creating a temp folder under /tmp
70+
tmp_dir = tempfile.mkdtemp(dir=tmp_root) if os.path.exists(tmp_root) else tempfile.mkdtemp(dir='/tmp')
71+
args_dict['tmp_dir'] = tmp_dir
72+
# Show the user a warning about limited storage in temp directories
73+
sys.stderr.write( "WARNING: Writing temporary files to " + tmp_dir + " which could fill " +
74+
"up and interrupt your colleagues' work. If you're working with giant files, then " +
75+
"please define your own --tmp-dir, or we're gonna get ya!\n" )
76+
6477
# Locate VEP and it's cache, the reference FASTA, and the VCF used for filtering
6578
vep_dir = cmo.util.programs['vep'][args.vep_release]
6679
args_dict['vep_data'] = vep_dir
@@ -77,16 +90,13 @@ if __name__ =='__main__':
7790

7891
# Build the command we're going to run
7992
cmd = [cmo.util.programs['perl']['default'], script_path]
80-
stderr = args.stderr
81-
stdout = args.stdout
82-
# Trim out arguments without values, and also any args that might mess with our logging
93+
# Trim out arguments without values
8394
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
84-
cmo.util.remove_logging_options_from_dict(args_dict)
8595

8696
# Make sure the arguments are in a format that the script will accept, and kick it off
8797
for arg, value in args_dict.items():
8898
arg = arg.replace("_","-")
8999
cmd = cmd + ["--"+arg, value]
90-
sys.stderr.write( "Running: " + " ".join( cmd ) + "\n" )
91-
cmo.util.call_cmd( " ".join( cmd ), stdout=stdout, stderr=stderr )
100+
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
101+
cmo.util.call_cmd( " ".join( cmd ))
92102
shutil.rmtree(tmp_dir)

bin/cmo_vcf2vcf

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
2+
3+
import argparse, os, sys, re, subprocess, itertools, glob
4+
from operator import attrgetter
5+
import textwrap as _textwrap
6+
import cmo
7+
8+
# Custom help formatter to display args in alphabetical order, and fitted line wrap for sphinx
9+
class SortingHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
10+
def add_arguments(self, actions):
11+
actions = sorted(actions, key=attrgetter('option_strings'))
12+
super(SortingHelpFormatter, self).add_arguments(actions)
13+
def _split_lines(self, text, width):
14+
text = self._whitespace_matcher.sub(' ', text).strip()
15+
return _textwrap.wrap(text, 78)
16+
17+
# Function that runs --help on the tool we've wrapped, and extracts documentation
18+
def parse_script_help(script_path):
19+
perl = cmo.util.programs['perl']['default']
20+
help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]), stdout=subprocess.PIPE, shell=True).communicate()[0]
21+
valid_args = re.findall(r"^\s*(--\S+)\s+([^\[\n]+)", help_text, re.M)
22+
defaults = re.findall(r"^\s*(--\S+)\s+[\S ]+\[([\S ]+)\]$", help_text, re.M)
23+
return dict(valid_args), dict(defaults)
24+
25+
if __name__ =='__main__':
26+
# We'll first need to figure out which version to run with "-h" to parse the help text
27+
preparser = argparse.ArgumentParser(description="Run vcf2vcf", add_help=False, formatter_class=SortingHelpFormatter)
28+
preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
29+
preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
30+
options, _ = preparser.parse_known_args()
31+
32+
# Figure out the path to the actual Perl script that this Python wrapper will run
33+
script_path = cmo.util.programs['vcf2maf'][options.version] + "vcf2vcf.pl"
34+
# Extract arguments and their defaults, by parsing the --help output
35+
args_dict, defaults_dict = parse_script_help(script_path)
36+
37+
# With arguments and defaults set, let's construct an argparse instance
38+
parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
39+
for arg, description in args_dict.items():
40+
# Hide a few arguments from the user, because we'll determine them ourselves
41+
if arg in ["--help","--man","--ref-fasta"]:
42+
continue
43+
if arg in defaults_dict and arg not in ["--output-maf"]:
44+
parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
45+
else:
46+
parser.add_argument(arg, action="store", metavar='', help=description)
47+
48+
# Now run the argparse instance, which will parse and execute, or print help text if requested
49+
args = parser.parse_args()
50+
args_dict = vars(args)
51+
52+
# Locate the reference for this genome build
53+
args_dict['ref_fasta'] = cmo.util.genomes[args.ncbi_build]['fasta']
54+
55+
# Remove arguments that the actual wrapped tool won't recognize
56+
for key in ["version"]:
57+
del args_dict[key]
58+
59+
# Build the command we're going to run
60+
cmd = [cmo.util.programs['perl']['default'], script_path]
61+
# Trim out arguments without values
62+
args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
63+
64+
# Make sure the arguments are in a format that the script will accept, and kick it off
65+
for arg, value in args_dict.items():
66+
arg = arg.replace("_","-")
67+
cmd = cmd + ["--"+arg, value]
68+
sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
69+
cmo.util.call_cmd( " ".join( cmd ))

0 commit comments

Comments
 (0)