diff --git a/README.md b/README.md index 5e2b8f9..6f64e15 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Matthew D Shirley, Viveksagar K Radhakrishna, Javad Golji, Joshua M Korn. (Prepr Installation: ``` $ pip install --user novartis-pisces -$ pisces dependencies +$ pisces_dependencies ``` Submitting jobs to an HPC cluster: diff --git a/pisces/R/make_expression_matrix.R b/pisces/R/make_expression_matrix.R index 369b302..4f74ca4 100755 --- a/pisces/R/make_expression_matrix.R +++ b/pisces/R/make_expression_matrix.R @@ -269,8 +269,13 @@ Summarize <- function(txi, tx2gene, annotation, args, metadata, species) { "genes.")) ribo.genes <- which(grepl("^RP[SL]", scaled_df[, "symbol"], ignore.case = T)) - first_quartile <- function(x) { y <- quantile(x, c(0.25, 0.5, 0.75), type=1) - return(y[[1]]) + + + first_quartile <- function(x) { + # > first_quartile(c(0,0,0,0,0,0,0,1,10,10)) + # [1] 1 + y <- quantile(x, c(0.25, 0.5, 0.75), type=1) + return(y[[3]]) } message(paste("Excluding", length(ribo.genes), "ribosomal genes from TMM scaling factor calulation.")) @@ -389,7 +394,7 @@ deseq_analysis <- function(contrasts, txi.gene, contrast.metadata, formula, fail } else { deseq.dataset <- DESeqDataSetFromTximport(txi.gene, contrast.metadata, as.formula(paste0("~", contrasts$Factor[1])))} message(paste("Filtering", length(failing.quartile.filter), "genes failing --quartile-expression cutoff from DESeq2 dataset.")) - deseq.dataset <- deseq.dataset[failing.quartile.filter,] + deseq.dataset <- deseq.dataset[-failing.quartile.filter,] message("Running DESeq2...") deseq.dataset <- DESeq(deseq.dataset) diff --git a/pisces/__init__.py b/pisces/__init__.py index 6d7c300..b41e8d2 100755 --- a/pisces/__init__.py +++ b/pisces/__init__.py @@ -11,8 +11,10 @@ import string import random import shutil +import platform +import tarfile from pprint import pformat -from subprocess import Popen, PIPE +from subprocess import Popen, PIPE, call from multiprocessing import Process from tempfile import NamedTemporaryFile, mkdtemp from functools import partial @@ -22,11 +24,46 @@ unique_id = ''.join(random.choice(string.digits) for _ in range(10)) - def find_data_directory(): """ Returns the path to the module directory """ return os.path.dirname(os.path.abspath(__file__)) +def install_salmon(): + """Install a version of salmon from bioconda""" + import glob + import requests + from io import BytesIO + from urllib.request import urlopen + from shutil import rmtree + from subprocess import call + + redist = os.path.join(find_data_directory(), 'redist') + rmtree(os.path.join(redist, "salmon"), ignore_errors=True) + + if platform.system() == "Linux": + salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/linux-64/salmon-1.3.0-hf69c8f4_0.tar.bz2" + elif platform.system() == "Darwin": + salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/osx-64/salmon-1.3.0-hb70dc8d_0.tar.bz2" + + print("Installing salmon") + with requests.get(salmon_url, allow_redirects=True) as response: + tar_file = BytesIO(response.content) + tar_file.seek(0) + with tarfile.open(fileobj=tar_file) as tar: + tar.extractall(path=os.path.join(redist, "salmon")) + +def install_r_dependencies(): + """Install R dependencies""" + cmd = [ + 'Rscript', + os.path.join(find_data_directory(), 'R/set_up_dependencies.R') + ] + call(cmd) + +def install_dependencies(): + install_salmon() + install_r_dependencies() + def sra_valid_accession(accession): """ Test whether a string is an SRA accession """ if accession.startswith('SRR') and len(accession) == 10: @@ -234,7 +271,7 @@ def format_salmon_command(libtype, threads, index, output_dir, read1, read2, 'bin', 'salmon'), '--no-version-check', 'quant', '-q', '--index', index, '--libType', libtype, '--mates1', ' '.join(read1), '--mates2', ' '.join(read2), '--output', output_dir, '--threads', - str(threads), '--seqBias', '--useVBOpt' + str(threads), '--seqBias', '--gcBias', '--validateMappings', '--dumpEq' ] elif not read2: cmd = [ @@ -242,7 +279,7 @@ def format_salmon_command(libtype, threads, index, output_dir, read1, read2, 'bin', 'salmon'), '--no-version-check', 'quant', '-q', '--index', index, '--libType', libtype, '-r', ' '.join(read1), '--output', output_dir, '--threads', - str(threads), '--seqBias', '--useVBOpt' + str(threads), '--seqBias', '--gcBias', '--validateMappings', '--dumpEq' ] if make_bam: import shlex diff --git a/pisces/cli.py b/pisces/cli.py index 3dabd81..518bdb0 100755 --- a/pisces/cli.py +++ b/pisces/cli.py @@ -59,16 +59,6 @@ def call_Rscript(args, unknown_args): ] cmd.extend(unknown_args) call(cmd) - -def call_Rscript_dependencies(args, unknown_args): - """ Call an R script, passing through arguments from argparse. """ - cmd = [ - 'Rscript', - os.path.join(data_dir, 'R/set_up_dependencies.R') - ] - cmd.extend(unknown_args) - call(cmd) - def default_species_index(conf): """ Return a list of the default index builds for each species defined in args.conf """ @@ -249,13 +239,6 @@ def create_parser(args=None): "Compile an expression matrix from multiple individual PISCES runs", add_help=False) matrix.set_defaults(func=call_Rscript) - - dependencies = subparsers.add_parser( - 'dependencies', - help= - "Install R dependencies for PISCES", - add_help=False) - dependencies.set_defaults(func=call_Rscript_dependencies) qc = subparsers.add_parser( 'summarize-qc', diff --git a/pisces/index.py b/pisces/index.py index 62184b6..5d2dc0f 100755 --- a/pisces/index.py +++ b/pisces/index.py @@ -266,10 +266,16 @@ def features_to_string(features, fasta_in, masked=True, strand=True): """ """ sequences = [] + feature_strand = "." for feature in features: + feature_strand = feature.strand sequences.append( feature.sequence( fasta_in, use_strand=strand)) + # if the transcript is on the reverse strand, reverse order of exons + # before concatenating + if feature_strand == "-": + sequences = sequences[::-1] seq = ''.join(sequences) mask_count = sum(seq.count(a) for a in soft_chars) if masked: diff --git a/setup.py b/setup.py index 79a07dc..32e0329 100644 --- a/setup.py +++ b/setup.py @@ -1,63 +1,11 @@ -import os -import sys -import tarfile -import zipfile -import subprocess -import logging -import platform from setuptools import setup -from setuptools.command.develop import develop -from setuptools.command.install import install install_requires = [ 'six', 'setuptools >= 0.7', 'simplesam >= 0.0.3', 'tqdm', 'fastqp >= 0.2', 'pandas', 'strandex', 'gffutils', 'pyfaidx', 'drmaa', 'twobitreader', 'requests' ] - -class PostDevelopCommand(develop): - def run(self): - develop.run(self) - install_binary_dependencies() - - -class PostInstallCommand(install): - def run(self): - install.run(self) - install_binary_dependencies() - - -def install_binary_dependencies(): - sys.path.pop(0) # ignore local search path - import pisces - import glob - import requests - from io import BytesIO - from urllib.request import urlopen - from shutil import rmtree - from subprocess import call - - redist = os.path.join(os.path.dirname(pisces.__file__), 'redist') - rmtree(os.path.join(redist, "salmon"), ignore_errors=True) - local_redist = os.path.join(os.path.dirname(__file__), 'pisces/redist') - - if platform.system() == "Linux": - salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/linux-64/salmon-1.3.0-hf69c8f4_0.tar.bz2" - elif platform.system() == "Darwin": - salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/osx-64/salmon-1.3.0-hb70dc8d_0.tar.bz2" - - print("Installing salmon") - with requests.get(salmon_url, allow_redirects=True) as response: - tar_file = BytesIO(response.content) - tar_file.seek(0) - with tarfile.open(fileobj=tar_file) as tar: - tar.extractall(path=os.path.join(redist, "salmon")) - setup( - cmdclass={ - 'develop': PostDevelopCommand, - 'install': PostInstallCommand, - }, name='novartis-pisces', author='Matthew Shirley', author_email='matt_d.shirley@novartis.com', @@ -70,7 +18,7 @@ def install_binary_dependencies(): install_requires=install_requires, use_scm_version={"local_scheme": "no-local-version"}, setup_requires=['setuptools_scm'], - entry_points={'console_scripts': ['pisces = pisces.cli:main']}, + entry_points={'console_scripts': ['pisces = pisces.cli:main', 'pisces_dependencies = pisces:install_dependencies']}, classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License",