Skip to content

Commit

Permalink
Merge pull request #20 from Novartis/develop
Browse files Browse the repository at this point in the history
Bring in recent changes and bug fixes before a new release
  • Loading branch information
mdshw5 authored Sep 1, 2021
2 parents ba9a89c + c80356d commit 323920d
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 78 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Matthew D Shirley, Viveksagar K Radhakrishna, Javad Golji, Joshua M Korn. (Prepr
Installation:
```
$ pip install --user novartis-pisces
$ pisces dependencies
$ pisces_dependencies
```

Submitting jobs to an HPC cluster:
Expand Down
11 changes: 8 additions & 3 deletions pisces/R/make_expression_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,13 @@ Summarize <- function(txi, tx2gene, annotation, args, metadata, species) {
"genes."))
ribo.genes <- which(grepl("^RP[SL]", scaled_df[, "symbol"], ignore.case = T))

first_quartile <- function(x) { y <- quantile(x, c(0.25, 0.5, 0.75), type=1)
return(y[[1]])


first_quartile <- function(x) {
# > first_quartile(c(0,0,0,0,0,0,0,1,10,10))
# [1] 1
y <- quantile(x, c(0.25, 0.5, 0.75), type=1)
return(y[[3]])
}

message(paste("Excluding", length(ribo.genes), "ribosomal genes from TMM scaling factor calulation."))
Expand Down Expand Up @@ -389,7 +394,7 @@ deseq_analysis <- function(contrasts, txi.gene, contrast.metadata, formula, fail
} else {
deseq.dataset <- DESeqDataSetFromTximport(txi.gene, contrast.metadata, as.formula(paste0("~", contrasts$Factor[1])))}
message(paste("Filtering", length(failing.quartile.filter), "genes failing --quartile-expression cutoff from DESeq2 dataset."))
deseq.dataset <- deseq.dataset[failing.quartile.filter,]
deseq.dataset <- deseq.dataset[-failing.quartile.filter,]
message("Running DESeq2...")

deseq.dataset <- DESeq(deseq.dataset)
Expand Down
45 changes: 41 additions & 4 deletions pisces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
import string
import random
import shutil
import platform
import tarfile
from pprint import pformat
from subprocess import Popen, PIPE
from subprocess import Popen, PIPE, call
from multiprocessing import Process
from tempfile import NamedTemporaryFile, mkdtemp
from functools import partial
Expand All @@ -22,11 +24,46 @@

unique_id = ''.join(random.choice(string.digits) for _ in range(10))


def find_data_directory():
""" Returns the path to the module directory """
return os.path.dirname(os.path.abspath(__file__))

def install_salmon():
"""Install a version of salmon from bioconda"""
import glob
import requests
from io import BytesIO
from urllib.request import urlopen
from shutil import rmtree
from subprocess import call

redist = os.path.join(find_data_directory(), 'redist')
rmtree(os.path.join(redist, "salmon"), ignore_errors=True)

if platform.system() == "Linux":
salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/linux-64/salmon-1.3.0-hf69c8f4_0.tar.bz2"
elif platform.system() == "Darwin":
salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/osx-64/salmon-1.3.0-hb70dc8d_0.tar.bz2"

print("Installing salmon")
with requests.get(salmon_url, allow_redirects=True) as response:
tar_file = BytesIO(response.content)
tar_file.seek(0)
with tarfile.open(fileobj=tar_file) as tar:
tar.extractall(path=os.path.join(redist, "salmon"))

def install_r_dependencies():
"""Install R dependencies"""
cmd = [
'Rscript',
os.path.join(find_data_directory(), 'R/set_up_dependencies.R')
]
call(cmd)

def install_dependencies():
install_salmon()
install_r_dependencies()

def sra_valid_accession(accession):
""" Test whether a string is an SRA accession """
if accession.startswith('SRR') and len(accession) == 10:
Expand Down Expand Up @@ -234,15 +271,15 @@ def format_salmon_command(libtype, threads, index, output_dir, read1, read2,
'bin', 'salmon'), '--no-version-check', 'quant', '-q',
'--index', index, '--libType', libtype, '--mates1', ' '.join(read1),
'--mates2', ' '.join(read2), '--output', output_dir, '--threads',
str(threads), '--seqBias', '--useVBOpt'
str(threads), '--seqBias', '--gcBias', '--validateMappings', '--dumpEq'
]
elif not read2:
cmd = [
os.path.join(data_dir, 'redist', 'salmon',
'bin', 'salmon'), '--no-version-check', 'quant', '-q',
'--index', index, '--libType', libtype, '-r', ' '.join(read1), '--output',
output_dir, '--threads',
str(threads), '--seqBias', '--useVBOpt'
str(threads), '--seqBias', '--gcBias', '--validateMappings', '--dumpEq'
]
if make_bam:
import shlex
Expand Down
17 changes: 0 additions & 17 deletions pisces/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,6 @@ def call_Rscript(args, unknown_args):
]
cmd.extend(unknown_args)
call(cmd)

def call_Rscript_dependencies(args, unknown_args):
""" Call an R script, passing through arguments from argparse. """
cmd = [
'Rscript',
os.path.join(data_dir, 'R/set_up_dependencies.R')
]
cmd.extend(unknown_args)
call(cmd)


def default_species_index(conf):
""" Return a list of the default index builds for each species defined in args.conf """
Expand Down Expand Up @@ -249,13 +239,6 @@ def create_parser(args=None):
"Compile an expression matrix from multiple individual PISCES runs",
add_help=False)
matrix.set_defaults(func=call_Rscript)

dependencies = subparsers.add_parser(
'dependencies',
help=
"Install R dependencies for PISCES",
add_help=False)
dependencies.set_defaults(func=call_Rscript_dependencies)

qc = subparsers.add_parser(
'summarize-qc',
Expand Down
6 changes: 6 additions & 0 deletions pisces/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,16 @@ def features_to_string(features, fasta_in, masked=True, strand=True):
"""
"""
sequences = []
feature_strand = "."
for feature in features:
feature_strand = feature.strand
sequences.append(
feature.sequence(
fasta_in, use_strand=strand))
# if the transcript is on the reverse strand, reverse order of exons
# before concatenating
if feature_strand == "-":
sequences = sequences[::-1]
seq = ''.join(sequences)
mask_count = sum(seq.count(a) for a in soft_chars)
if masked:
Expand Down
54 changes: 1 addition & 53 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,11 @@
import os
import sys
import tarfile
import zipfile
import subprocess
import logging
import platform
from setuptools import setup
from setuptools.command.develop import develop
from setuptools.command.install import install

install_requires = [
'six', 'setuptools >= 0.7', 'simplesam >= 0.0.3', 'tqdm', 'fastqp >= 0.2',
'pandas', 'strandex', 'gffutils', 'pyfaidx', 'drmaa', 'twobitreader', 'requests'
]


class PostDevelopCommand(develop):
def run(self):
develop.run(self)
install_binary_dependencies()


class PostInstallCommand(install):
def run(self):
install.run(self)
install_binary_dependencies()


def install_binary_dependencies():
sys.path.pop(0) # ignore local search path
import pisces
import glob
import requests
from io import BytesIO
from urllib.request import urlopen
from shutil import rmtree
from subprocess import call

redist = os.path.join(os.path.dirname(pisces.__file__), 'redist')
rmtree(os.path.join(redist, "salmon"), ignore_errors=True)
local_redist = os.path.join(os.path.dirname(__file__), 'pisces/redist')

if platform.system() == "Linux":
salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/linux-64/salmon-1.3.0-hf69c8f4_0.tar.bz2"
elif platform.system() == "Darwin":
salmon_url = "https://anaconda.org/bioconda/salmon/1.3.0/download/osx-64/salmon-1.3.0-hb70dc8d_0.tar.bz2"

print("Installing salmon")
with requests.get(salmon_url, allow_redirects=True) as response:
tar_file = BytesIO(response.content)
tar_file.seek(0)
with tarfile.open(fileobj=tar_file) as tar:
tar.extractall(path=os.path.join(redist, "salmon"))

setup(
cmdclass={
'develop': PostDevelopCommand,
'install': PostInstallCommand,
},
name='novartis-pisces',
author='Matthew Shirley',
author_email='[email protected]',
Expand All @@ -70,7 +18,7 @@ def install_binary_dependencies():
install_requires=install_requires,
use_scm_version={"local_scheme": "no-local-version"},
setup_requires=['setuptools_scm'],
entry_points={'console_scripts': ['pisces = pisces.cli:main']},
entry_points={'console_scripts': ['pisces = pisces.cli:main', 'pisces_dependencies = pisces:install_dependencies']},
classifiers=[
"Development Status :: 4 - Beta",
"License :: OSI Approved :: Apache Software License",
Expand Down

0 comments on commit 323920d

Please sign in to comment.