Merge branch 'master' of https://github.com/mskcc/cmo

lordzappo · lordzappo · commit 0f56ebaf641a · 2017-04-04T08:51:16.000-04:00
diff --git a/bin/cmo_maf2maf b/bin/cmo_maf2maf
@@ -1,6 +1,6 @@
 #!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
 
-import argparse, os, sys, re, subprocess, itertools, glob, tempfile, shutil
+import argparse, os, sys, re, subprocess, itertools, glob, getpass, tempfile, shutil
 from operator import attrgetter
 import textwrap as _textwrap
 import cmo
@@ -40,8 +40,8 @@ if __name__ =='__main__':
     defaults_dict['--custom-enst'] = cmo.util.programs['vcf2maf'][options.version] + "data/isoform_overrides_at_mskcc"
     defaults_dict['--filter-vcf'] = cmo.util.genomes['GRCh37']['exac']
     defaults_dict['--retain-cols'] = 'Center,Verification_Status,Validation_Status,Mutation_Status,Sequencing_Phase,Sequence_Source,Validation_Method,Score,BAM_file,Sequencer,Tumor_Sample_UUID,Matched_Norm_Sample_UUID,Caller'
-    tmp_dir = tempfile.mkdtemp(dir='/scratch') if os.path.exists('/scratch') else tempfile.mkdtemp(dir='/tmp');
-    defaults_dict['--tmp-dir'] = tmp_dir
+    tmp_root = "/scratch/<username>/..."
+    defaults_dict['--tmp-dir'] = tmp_root
 
     # With arguments and defaults set, let's construct an argparse instance
     parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
@@ -53,12 +53,25 @@ if __name__ =='__main__':
             parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
         else:
             parser.add_argument(arg, action="store", metavar='', help=description)
-    cmo.util.add_logging_options(parser)
 
     # Now run the argparse instance, which will parse and execute, or print help text if requested
     args = parser.parse_args()
     args_dict = vars(args)
 
+    # If user didn't define their own --tmp-dir, let's create one for them under /scratch/username
+    if args_dict['tmp_dir'] == tmp_root:
+        # Create a subdirectory under /scratch with the username, if it doesn't already exist
+        tmp_root = "/scratch/" + getpass.getuser()
+        if not os.path.exists(tmp_root):
+            os.makedirs(tmp_root)
+        # For machines without writable /scratch, default to creating a temp folder under /tmp
+        tmp_dir = tempfile.mkdtemp(dir=tmp_root) if os.path.exists(tmp_root) else tempfile.mkdtemp(dir='/tmp')
+        args_dict['tmp_dir'] = tmp_dir
+        # Show the user a warning about limited storage in temp directories
+        sys.stderr.write( "WARNING: Writing temporary files to " + tmp_dir + " which could fill " + 
+            "up and interrupt your colleagues' work. If you're working with giant files, then " +
+            "please define your own --tmp-dir, or we're gonna get ya!\n" )
+
     # Locate VEP and it's cache, the reference FASTA, and the VCF used for filtering
     vep_dir = cmo.util.programs['vep'][args.vep_release]
     args_dict['vep_data'] = vep_dir
@@ -75,16 +88,13 @@ if __name__ =='__main__':
 
     # Build the command we're going to run
     cmd = [cmo.util.programs['perl']['default'], script_path]
-    stderr = args.stderr
-    stdout = args.stdout
-    # Trim out arguments without values, and also any args that might mess with our logging
+    # Trim out arguments without values
     args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
-    cmo.util.remove_logging_options_from_dict(args_dict)
 
     # Make sure the arguments are in a format that the script will accept, and kick it off
     for arg, value in args_dict.items():
         arg = arg.replace("_","-")
         cmd = cmd + ["--"+arg, value]
-    sys.stderr.write( "Running: " + " ".join( cmd ) + "\n" )
-    cmo.util.call_cmd( " ".join( cmd ), stdout=stdout, stderr=stderr )
+    sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
+    cmo.util.call_cmd( " ".join( cmd ))
     shutil.rmtree(tmp_dir)
diff --git a/bin/cmo_maf2vcf b/bin/cmo_maf2vcf
@@ -1,37 +1,69 @@
 #!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
 
 import argparse, os, sys, re, subprocess, itertools, glob
+from operator import attrgetter
+import textwrap as _textwrap
 import cmo
 
+# Custom help formatter to display args in alphabetical order, and fitted line wrap for sphinx
+class SortingHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
+    def add_arguments(self, actions):
+        actions = sorted(actions, key=attrgetter('option_strings'))
+        super(SortingHelpFormatter, self).add_arguments(actions)
+    def _split_lines(self, text, width):
+        text = self._whitespace_matcher.sub(' ', text).strip()
+        return _textwrap.wrap(text, 78)
+
+# Function that runs --help on the tool we've wrapped, and extracts documentation
 def parse_script_help(script_path):
     perl = cmo.util.programs['perl']['default']
-    help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]),stdout=subprocess.PIPE,shell=True).communicate()[0]
-    valid_args = re.findall(r"\s+(--[\S_]+)\s+([\S \t]+)\n?", help_text, re.M)
-    return dict(valid_args)
+    help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]), stdout=subprocess.PIPE, shell=True).communicate()[0]
+    valid_args = re.findall(r"^\s*(--\S+)\s+([^\[\n]+)", help_text, re.M)
+    defaults = re.findall(r"^\s*(--\S+)\s+[\S ]+\[([\S ]+)\]$", help_text, re.M)
+    return dict(valid_args), dict(defaults)
 
 if __name__ =='__main__':
     # We'll first need to figure out which version to run with "-h" to parse the help text
-    preparser = argparse.ArgumentParser(description="run maf2vcf", add_help=False)
-    preparser.add_argument("--version", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
+    preparser = argparse.ArgumentParser(description="Run maf2vcf", add_help=False, formatter_class=SortingHelpFormatter)
+    preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
+    preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
     options, _ = preparser.parse_known_args()
+
+    # Figure out the path to the actual Perl script that this Python wrapper will run
     script_path = cmo.util.programs['vcf2maf'][options.version] + "maf2vcf.pl"
-    args_dict = parse_script_help(script_path)
-    parser = argparse.ArgumentParser(parents = [preparser], add_help=True)
+    # Extract arguments and their defaults, by parsing the --help output
+    args_dict, defaults_dict = parse_script_help(script_path)
+
+    # With arguments and defaults set, let's construct an argparse instance
+    parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
     for arg, description in args_dict.items():
-        if arg == "--help":
+        # Hide a few arguments from the user, because we'll determine them ourselves
+        if arg in ["--help","--man","--ref-fasta"]:
             continue
-        parser.add_argument(arg,action="store", metavar='', help=description)
-    cmo.util.add_logging_options(parser)
+        if arg in defaults_dict and arg not in ["--output-maf"]:
+            parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
+        else:
+            parser.add_argument(arg, action="store", metavar='', help=description)
+
+    # Now run the argparse instance, which will parse and execute, or print help text if requested
     args = parser.parse_args()
     args_dict = vars(args)
+
+    # Locate the reference for this genome build
+    args_dict['ref_fasta'] = cmo.util.genomes[args.ncbi_build]['fasta']
+
+    # Remove arguments that the actual wrapped tool won't recognize
     for key in ["version"]:
         del args_dict[key]
+
+    # Build the command we're going to run
     cmd = [cmo.util.programs['perl']['default'], script_path]
-    stderr = args.stderr
-    stdout = args.stdout
+    # Trim out arguments without values
     args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
-    cmo.util.remove_logging_options_from_dict(args_dict)
+
+    # Make sure the arguments are in a format that the script will accept, and kick it off
     for arg, value in args_dict.items():
         arg = arg.replace("_","-")
         cmd = cmd + ["--"+arg, value]
-    cmo.util.call_cmd(" ".join(cmd), stdout=stdout, stderr=stderr)
+    sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
+    cmo.util.call_cmd( " ".join( cmd ))
diff --git a/bin/cmo_vcf2maf b/bin/cmo_vcf2maf
@@ -1,6 +1,6 @@
 #!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
 
-import argparse, os, sys, re, subprocess, itertools, glob, tempfile, shutil
+import argparse, os, sys, re, subprocess, itertools, glob, getpass, tempfile, shutil
 from operator import attrgetter
 import textwrap as _textwrap
 import cmo
@@ -42,8 +42,8 @@ if __name__ =='__main__':
     defaults_dict['--maf-center'] = 'mskcc.org'
     defaults_dict['--vcf-tumor-id'] = defaults_dict['--tumor-id']
     defaults_dict['--vcf-normal-id'] = defaults_dict['--normal-id']
-    tmp_dir = tempfile.mkdtemp(dir='/scratch') if os.path.exists('/scratch') else tempfile.mkdtemp(dir='/tmp');
-    defaults_dict['--tmp-dir'] = tmp_dir
+    tmp_root = "/scratch/<username>/..."
+    defaults_dict['--tmp-dir'] = tmp_root
 
     # With arguments and defaults set, let's construct an argparse instance
     parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
@@ -55,12 +55,25 @@ if __name__ =='__main__':
             parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
         else:
             parser.add_argument(arg, action="store", metavar='', help=description)
-    cmo.util.add_logging_options(parser)
 
     # Now run the argparse instance, which will parse and execute, or print help text if requested
     args = parser.parse_args()
     args_dict = vars(args)
 
+    # If user didn't define their own --tmp-dir, let's create one for them under /scratch/username
+    if args_dict['tmp_dir'] == tmp_root:
+        # Create a subdirectory under /scratch with the username, if it doesn't already exist
+        tmp_root = "/scratch/" + getpass.getuser()
+        if not os.path.exists(tmp_root):
+            os.makedirs(tmp_root)
+        # For machines without writable /scratch, default to creating a temp folder under /tmp
+        tmp_dir = tempfile.mkdtemp(dir=tmp_root) if os.path.exists(tmp_root) else tempfile.mkdtemp(dir='/tmp')
+        args_dict['tmp_dir'] = tmp_dir
+        # Show the user a warning about limited storage in temp directories
+        sys.stderr.write( "WARNING: Writing temporary files to " + tmp_dir + " which could fill " + 
+            "up and interrupt your colleagues' work. If you're working with giant files, then " +
+            "please define your own --tmp-dir, or we're gonna get ya!\n" )
+
     # Locate VEP and it's cache, the reference FASTA, and the VCF used for filtering
     vep_dir = cmo.util.programs['vep'][args.vep_release]
     args_dict['vep_data'] = vep_dir
@@ -77,16 +90,13 @@ if __name__ =='__main__':
 
     # Build the command we're going to run
     cmd = [cmo.util.programs['perl']['default'], script_path]
-    stderr = args.stderr
-    stdout = args.stdout
-    # Trim out arguments without values, and also any args that might mess with our logging
+    # Trim out arguments without values
     args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
-    cmo.util.remove_logging_options_from_dict(args_dict)
 
     # Make sure the arguments are in a format that the script will accept, and kick it off
     for arg, value in args_dict.items():
         arg = arg.replace("_","-")
         cmd = cmd + ["--"+arg, value]
-    sys.stderr.write( "Running: " + " ".join( cmd ) + "\n" )
-    cmo.util.call_cmd( " ".join( cmd ), stdout=stdout, stderr=stderr )
+    sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
+    cmo.util.call_cmd( " ".join( cmd ))
     shutil.rmtree(tmp_dir)
diff --git a/bin/cmo_vcf2vcf b/bin/cmo_vcf2vcf
@@ -0,0 +1,69 @@
+#!/opt/common/CentOS_6-dev/python/python-2.7.10/bin/python
+
+import argparse, os, sys, re, subprocess, itertools, glob
+from operator import attrgetter
+import textwrap as _textwrap
+import cmo
+
+# Custom help formatter to display args in alphabetical order, and fitted line wrap for sphinx
+class SortingHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
+    def add_arguments(self, actions):
+        actions = sorted(actions, key=attrgetter('option_strings'))
+        super(SortingHelpFormatter, self).add_arguments(actions)
+    def _split_lines(self, text, width):
+        text = self._whitespace_matcher.sub(' ', text).strip()
+        return _textwrap.wrap(text, 78)
+
+# Function that runs --help on the tool we've wrapped, and extracts documentation
+def parse_script_help(script_path):
+    perl = cmo.util.programs['perl']['default']
+    help_text = subprocess.Popen(" ".join([perl, script_path, "-h"]), stdout=subprocess.PIPE, shell=True).communicate()[0]
+    valid_args = re.findall(r"^\s*(--\S+)\s+([^\[\n]+)", help_text, re.M)
+    defaults = re.findall(r"^\s*(--\S+)\s+[\S ]+\[([\S ]+)\]$", help_text, re.M)
+    return dict(valid_args), dict(defaults)
+
+if __name__ =='__main__':
+    # We'll first need to figure out which version to run with "-h" to parse the help text
+    preparser = argparse.ArgumentParser(description="Run vcf2vcf", add_help=False, formatter_class=SortingHelpFormatter)
+    preparser.add_argument("--version", help="Version of tool to run", choices=cmo.util.programs['vcf2maf'].keys(), default="default")
+    preparser.add_argument("--ncbi-build", help="Genome build of variants in input", choices=["GRCh37","GRCh38","GRCm38"], default="GRCh37")
+    options, _ = preparser.parse_known_args()
+
+    # Figure out the path to the actual Perl script that this Python wrapper will run
+    script_path = cmo.util.programs['vcf2maf'][options.version] + "vcf2vcf.pl"
+    # Extract arguments and their defaults, by parsing the --help output
+    args_dict, defaults_dict = parse_script_help(script_path)
+
+    # With arguments and defaults set, let's construct an argparse instance
+    parser = argparse.ArgumentParser(parents = [preparser], add_help=True, formatter_class=SortingHelpFormatter)
+    for arg, description in args_dict.items():
+        # Hide a few arguments from the user, because we'll determine them ourselves
+        if arg in ["--help","--man","--ref-fasta"]:
+            continue
+        if arg in defaults_dict and arg not in ["--output-maf"]:
+            parser.add_argument(arg, action="store", metavar='', help=description, default=defaults_dict[arg])
+        else:
+            parser.add_argument(arg, action="store", metavar='', help=description)
+
+    # Now run the argparse instance, which will parse and execute, or print help text if requested
+    args = parser.parse_args()
+    args_dict = vars(args)
+
+    # Locate the reference for this genome build
+    args_dict['ref_fasta'] = cmo.util.genomes[args.ncbi_build]['fasta']
+
+    # Remove arguments that the actual wrapped tool won't recognize
+    for key in ["version"]:
+        del args_dict[key]
+
+    # Build the command we're going to run
+    cmd = [cmo.util.programs['perl']['default'], script_path]
+    # Trim out arguments without values
+    args_dict = dict((k, v) for k, v in args_dict.iteritems() if v)
+
+    # Make sure the arguments are in a format that the script will accept, and kick it off
+    for arg, value in args_dict.items():
+        arg = arg.replace("_","-")
+        cmd = cmd + ["--"+arg, value]
+    sys.stderr.write( "RUNNING: " + " ".join( cmd ) + "\n" )
+    cmo.util.call_cmd( " ".join( cmd ))