From 4f35e9b537cd40096b6dc2ba47ebe92acaf34fd0 Mon Sep 17 00:00:00 2001 From: Yueqi Wang Date: Mon, 18 Jun 2018 05:51:38 -0600 Subject: [PATCH 01/15] add png.arguments to FeaturePlot --- R/plotting.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 06599af41..6246e711a 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1036,7 +1036,9 @@ FeaturePlot <- function( coord.fixed = FALSE, dark.theme = FALSE, do.return = FALSE, - vector.friendly=FALSE + vector.friendly=FALSE, + png.file = NULL, + png.arguments = c(10,10, 100) ) { cells.use <- SetIfNull(x = cells.use, default = colnames(x = object@data)) if (is.null(x = nCol)) { @@ -1149,7 +1151,9 @@ FeaturePlot <- function( no.axes = no.axes, no.legend = no.legend, dark.theme = dark.theme, - vector.friendly = vector.friendly + vector.friendly = vector.friendly, + png.file = png.file, + png.arguments = png.arguments ), SIMPLIFY = FALSE # Get list, not matrix ) @@ -1550,7 +1554,7 @@ globalVariables(names = 'Value', package = 'Seurat', add = TRUE) #' @param plot.y.lim Y-axis maximum on each QQ plot. #' #' @return Returns a Seurat object where object@@dr$pca@@jackstraw@@overall.p.values -#' represents p-values for each PC and object@@dr$pca@@misc$jackstraw.plot +#' represents p-values for each PC and object@@dr$pca@@misc$jackstraw.plot #' stores the ggplot2 plot. #' #' @author Thanks to Omri Wurtzel for integrating with ggplot @@ -1627,10 +1631,10 @@ JackStrawPlot <- function( coord_flip() + geom_abline(intercept = 0, slope = 1, linetype = "dashed", na.rm = TRUE) + theme_bw() - + object@dr$pca@misc[["jackstraw.plot"]] <- gp print(gp) - + return(object) } From 93b856f131fb735f552c8bee1534bab22cedf563 Mon Sep 17 00:00:00 2001 From: Nigel Delaney Date: Thu, 21 Jun 2018 17:16:08 -0700 Subject: [PATCH 02/15] Remove All PKG_LIBS Flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A previous pull request (https://github.com/satijalab/seurat/pull/466) removed the unused `$(FLIBS)` flag from the Makevars file, which was a mistake because there are circumstances where linking to `$(LAPACK_LIBS) $(BLAS_LIBS)` depend on fortran and so can create compile errors. This new commit removes all of these unused flags, rather than just one. Thus avoiding unneccesary linkage, fortran errors AND `R CMD CHECK` errors. For background, in older versions of Rcpp, these compile flags were required. However, Rcpp has since been updated to avoid this issue. Their FAQ (http://dirk.eddelbuettel.com/code/rcpp/Rcpp-FAQ.pdf) describes the issue 2.15. **What about the new ‘no-linking’ feature.** Starting with Rcpp 0.11.0, functionality provided by Rcpp and used by packages built with Rcpp accessed via the registration facility offered by R (and which is used by lme4 and Matrix, as well as by xts and zoo). This requires no effort from the user / programmer, and even frees us from explicit linking instruction. In most cases, the files src/Makevars and src/Makevars.win can now be removed. Exceptions are the use of RcppArmadillo (which needs an entry PKG_LIBS=$(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)) and packages linking to external libraries they use. But for most packages using Rcpp, only two things are required: • an entry in DESCRIPTION such as Imports: Rcpp (which may be versioned as in Imports: Rcpp (>= 0.11.0)), and • an entry in NAMESPACE to ensure Rcpp is correctly instantiated, for example importFrom(Rcpp, evalCpp). The name of the symbol does not really matter. Rcpp version 0.11.0 was released years before the current version of R required by Seurat (3.4.0), so although this requirement is implicit, this commit also specifies the version requirement. Sorry for the mistake last time! --- DESCRIPTION | 4 ++-- src/Makevars | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c14a2d913..88a21f6fe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,7 @@ Imports: reshape2, gplots, gdata, - Rcpp, + Rcpp (>= 0.11.0), tclust, ranger, dtw, @@ -64,7 +64,7 @@ Imports: reticulate, foreach, hdf5r -LinkingTo: Rcpp, RcppEigen, RcppProgress +LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: GPL-3 | file LICENSE LazyData: true Collate: diff --git a/src/Makevars b/src/Makevars index 6eb107b44..a7f35101d 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,2 +1 @@ -PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) CXX_STD = CXX11 From 9c7205c35d986a1ff108d0f9094f8f4f1f7440ec Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 12:05:17 -0400 Subject: [PATCH 03/15] Remove dependencies --- DESCRIPTION | 1 - NAMESPACE | 1 - R/preprocessing.R | 2 -- 3 files changed, 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 88a21f6fe..738240377 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,6 @@ SystemRequirements: Java (>= 6) Imports: methods, ROCR, - stringr, mixtools, lars, ica, diff --git a/NAMESPACE b/NAMESPACE index b7b25a396..8351315c9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -286,7 +286,6 @@ import(lars) import(metap) import(parallel) import(pbapply) -import(stringr) importFrom(FNN,get.knn) importFrom(Hmisc,cut2) importFrom(MASS,glm.nb) diff --git a/R/preprocessing.R b/R/preprocessing.R index 7ccb089f6..62ec8e17e 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -31,8 +31,6 @@ #' @return Returns a Seurat object with the raw data stored in object@@raw.data. #' object@@data, object@@meta.data, object@@ident, also initialized. #' -#' @import stringr -#' @import pbapply #' @importFrom methods new #' @importFrom utils packageVersion #' @importFrom Matrix colSums rowSums From 42615fc3a6995d6833cf3a15d3c475116e780ef6 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 12:06:02 -0400 Subject: [PATCH 04/15] devtools::document updated these --- man/FeaturePlot.Rd | 3 ++- man/JackStrawPlot.Rd | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 2a5ba68ee..c5ad366a6 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,7 +10,8 @@ FeaturePlot(object, features.plot, min.cutoff = NA, max.cutoff = NA, do.hover = FALSE, data.hover = "ident", do.identify = FALSE, reduction.use = "tsne", use.imputed = FALSE, nCol = NULL, no.axes = FALSE, no.legend = TRUE, coord.fixed = FALSE, - dark.theme = FALSE, do.return = FALSE, vector.friendly = FALSE) + dark.theme = FALSE, do.return = FALSE, vector.friendly = FALSE, + png.file = NULL, png.arguments = c(10, 10, 100)) } \arguments{ \item{object}{Seurat object} diff --git a/man/JackStrawPlot.Rd b/man/JackStrawPlot.Rd index d857f97b7..556139353 100644 --- a/man/JackStrawPlot.Rd +++ b/man/JackStrawPlot.Rd @@ -23,7 +23,7 @@ significance (see Details)} } \value{ Returns a Seurat object where object@dr$pca@jackstraw@overall.p.values -represents p-values for each PC and object@dr$pca@misc$jackstraw.plot +represents p-values for each PC and object@dr$pca@misc$jackstraw.plot stores the ggplot2 plot. } \description{ From 6a24c4b762e19662840d024f823820f39b265a07 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 12:06:35 -0400 Subject: [PATCH 05/15] Use development version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 738240377..9554e3dc2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 2.3.2 +Version: 2.3.3.9000 Date: 2018-06-11 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , and Butler A and Satija R (2017) for more details. From 8cd57364974088890e457be9c43afbbfec9b1f97 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 12:52:56 -0400 Subject: [PATCH 06/15] Remove gdata dependency --- DESCRIPTION | 2 +- NAMESPACE | 2 -- R/interaction.R | 6 ++---- R/zfRenderSeurat.R | 12 ++++++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9554e3dc2..64c4ffddb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,6 @@ Imports: irlba, reshape2, gplots, - gdata, Rcpp (>= 0.11.0), tclust, ranger, @@ -102,6 +101,7 @@ Collate: 'zfRenderSeurat.R' RoxygenNote: 6.0.1 Suggests: + gdata, testthat, loomR, phateR, diff --git a/NAMESPACE b/NAMESPACE index 8351315c9..f207564fe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -331,8 +331,6 @@ importFrom(dtw,dtw) importFrom(fitdistrplus,fitdist) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) -importFrom(gdata,drop.levels) -importFrom(gdata,interleave) importFrom(ggplot2,annotation_raster) importFrom(ggridges,geom_density_ridges) importFrom(ggridges,theme_ridges) diff --git a/R/interaction.R b/R/interaction.R index ad7a72255..42c9ba178 100644 --- a/R/interaction.R +++ b/R/interaction.R @@ -921,8 +921,6 @@ StashIdent <- function(object, save.name = "oldIdent") { #' #' @return A Seurat object where object@@ident has been appropriately modified #' -#' @importFrom gdata drop.levels -#' #' @export #' #' @examples @@ -954,7 +952,7 @@ SetIdent <- function(object, cells.use = NULL, ident.use = NULL) { ) ) object@ident[cells.use] <- ident.use - object@ident <- drop.levels(x = object@ident) + object@ident <- droplevels(x = object@ident) return(object) } @@ -1194,7 +1192,7 @@ RenameCells <- function(object, add.cell.id = NULL, new.names = NULL, colnames(object@raw.data) <- new.rawdata.names rownames(object@meta.data) <- new.cell.names object@cell.names <- new.cell.names - + if (for.merge) { return(object) } diff --git a/R/zfRenderSeurat.R b/R/zfRenderSeurat.R index ab651a902..138a28353 100644 --- a/R/zfRenderSeurat.R +++ b/R/zfRenderSeurat.R @@ -538,9 +538,13 @@ zf.insitu.ventral <- function(seuratObject, gene, label=TRUE, ...) { view3d(zoom = .75, userMatrix = rotMat, fov = 0) } +# @importFrom gdata interleave +#' @importFrom utils installed.packages #' @export -#' @importFrom gdata interleave zf.insitu.side <- function(expressionMatrix, nonmirror = TRUE, mirror = TRUE) { + if (!'gdata' %in% rownames(x = installed.packages())) { + stop("Please install gdata") + } # Determine geometry tierBins <- 30 # 1 bin per cell tier. DVBins <- 64 # 1 bin every 5.625 degrees; compatible with our current 8-bin system. @@ -641,7 +645,7 @@ zf.insitu.side <- function(expressionMatrix, nonmirror = TRUE, mirror = TRUE) { } } # Take the coordinates and reformat the lists to pass to RGL - quadX <- interleave( + quadX <- gdata::interleave( drawEmbryo$x1, drawEmbryo$x2, drawEmbryo$x3, @@ -649,7 +653,7 @@ zf.insitu.side <- function(expressionMatrix, nonmirror = TRUE, mirror = TRUE) { drop = TRUE ) dim(x = quadX) <- c(dim(x = quadX)[1] * dim(x = quadX)[2], 1) - quadY <- interleave( + quadY <- gdata::interleave( drawEmbryo$y1, drawEmbryo$y2, drawEmbryo$y3, @@ -657,7 +661,7 @@ zf.insitu.side <- function(expressionMatrix, nonmirror = TRUE, mirror = TRUE) { drop = TRUE ) dim(x = quadY) <- c(dim(x = quadY)[1] * dim(x = quadY)[2], 1) - quadZ <- interleave( + quadZ <- gdata::interleave( drawEmbryo$z1, drawEmbryo$z2, drawEmbryo$z3, From e4ca4452c1f901b5a41417737396c4ede2be1a47 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 13:58:12 -0400 Subject: [PATCH 07/15] Minor fix for gdata replacement --- R/interaction.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/interaction.R b/R/interaction.R index 42c9ba178..bb62745a0 100644 --- a/R/interaction.R +++ b/R/interaction.R @@ -416,7 +416,7 @@ SubsetData <- function( ) gc(verbose = FALSE) } - object@ident <- drop.levels(x = object@ident[cells.use]) + object@ident <- droplevels(x = object@ident[cells.use]) if (length(x = object@dr) > 0) { for (i in 1:length(object@dr)) { if (length(object@dr[[i]]@cell.embeddings) > 0) { From 63224ebac00ee02b69b0cec1a49854452249455b Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 15:48:50 -0400 Subject: [PATCH 08/15] Replace FNN with RANN --- DESCRIPTION | 1 - NAMESPACE | 1 - R/alignment.R | 53 +++++++++++++++++++++++---------- R/utilities.R | 43 ++++++++++++-------------- man/AddSmoothedScore.Rd | 6 +++- man/CalcAlignmentMetric.Rd | 5 +++- tests/testthat/test_alignment.R | 2 +- 7 files changed, 66 insertions(+), 45 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 64c4ffddb..50b6db21c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,6 @@ Imports: VGAM, pbapply, igraph, - FNN, RANN, caret, dplyr, diff --git a/NAMESPACE b/NAMESPACE index f207564fe..91d135689 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -286,7 +286,6 @@ import(lars) import(metap) import(parallel) import(pbapply) -importFrom(FNN,get.knn) importFrom(Hmisc,cut2) importFrom(MASS,glm.nb) importFrom(MASS,kde2d) diff --git a/R/alignment.R b/R/alignment.R index 6ceb51a89..fe0c64120 100644 --- a/R/alignment.R +++ b/R/alignment.R @@ -275,8 +275,10 @@ AlignSubspace <- function( #' @param dims.use Dimensions to use in building the NN graph #' @param grouping.var Grouping variable used in the alignment. #' @param nn Number of neighbors to calculate in the NN graph construction +#' @param nn.eps Error bound when performing nearest neighbor seach using RANN; +#' default of 0.0 implies exact nearest neighbor search #' -#' @importFrom FNN get.knn +#' @importFrom RANN nn2 #' @export #' #' @examples @@ -294,22 +296,41 @@ AlignSubspace <- function( #' dims.use = 1:5, grouping.var = "group") #' } #' -CalcAlignmentMetric <- function(object, reduction.use = "cca.aligned", dims.use, - grouping.var, nn){ - object <- SetAllIdent(object, grouping.var) - object <- SubsetData(object, max.cells.per.ident = min(table(object@ident))) - num.groups <- length(unique(object@ident)) - if(missing(nn)){ - nn <- ceiling(table(object@ident)[1] * 0.01 * num.groups) +CalcAlignmentMetric <- function( + object, + reduction.use = "cca.aligned", + dims.use, + grouping.var, + nn, + nn.eps = 0 +) { + object <- SetAllIdent(object = object, id = grouping.var) + object <- SubsetData(object = object, max.cells.per.ident = min(table(object@ident))) + num.groups <- length(x = unique(x = object@ident)) + if (missing(x = nn)) { + nn <- ceiling(x = table(object@ident)[1] * 0.01 * num.groups) } - dist.mat <- GetCellEmbeddings(object, reduction.type = reduction.use, dims.use = dims.use) - object.fnn <- get.knn(dist.mat, k = nn) - alignment.score <- sapply(1:length(object@cell.names), function(x) { - cell.id <- object@ident[x] - num.same.id <- length(which(object@ident[object.fnn$nn.index[x, ]] == cell.id)) - }) - alignment.score <- 1 - ((mean(alignment.score) - nn /num.groups) / (nn - nn/num.groups)) - return(unname(alignment.score)) + dist.mat <- GetCellEmbeddings( + object = object, + reduction.type = reduction.use, + dims.use = dims.use + ) + # object.fnn <- get.knn(dist.mat, k = nn) + object.fnn <- nn2( + data = dist.mat, + k = nn, + searchtype = 'standard', + eps = nn.eps + ) + alignment.score <- sapply( + X = 1:length(x = object@cell.names), + FUN = function(x) { + cell.id <- object@ident[x] + num.same.id <- length(x = which(x = object@ident[object.fnn$nn.idx[x, ]] == cell.id)) + } + ) + alignment.score <- 1 - ((mean(x = alignment.score) - nn / num.groups) / (nn - nn / num.groups)) + return(unname(obj = alignment.score)) } #' Calculate the ratio of variance explained by ICA or PCA to CCA diff --git a/R/utilities.R b/R/utilities.R index 1be939e20..951fdf91e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -710,8 +710,10 @@ MergeNode <- function(object, node.use, rebuild.tree = FALSE, ...) { #' @param reduction.use Dimensional reduction to use #' @param k k-param for k-nearest neighbor calculation. 30 by default #' @param do.log Whether to perform smoothing in log space. Default is false. +#' @param nn.eps Error bound when performing nearest neighbor seach using RANN; +#' default of 0.0 implies exact nearest neighbor search #' -#' @importFrom FNN get.knn +#' @importFrom RANN nn2 #' #' @export #' @@ -726,7 +728,8 @@ AddSmoothedScore <- function( reduction.use = "tsne", k = 30, do.log = FALSE, - do.print = FALSE + do.print = FALSE, + nn.eps = 0 ) { genes.fit <- SetIfNull(x = genes.fit, default = object@var.genes) genes.fit <- genes.fit[genes.fit %in% rownames(x = object@data)] @@ -737,31 +740,23 @@ AddSmoothedScore <- function( ) dim.codes <- paste0(dim.code, c(dim.1, dim.2)) data.plot <- FetchData(object = object, vars.all = dim.codes) - knn.smooth <- get.knn(data = data.plot, k = k)$nn.index - avg.fxn <- mean - if (! do.log) { - avg.fxn <- ExpMean - } - lasso.fits <- data.frame( - t( - x = sapply( - X = genes.fit, - FUN = function(g) { - return(unlist( - x = lapply( - X = 1:nrow(x = data.plot), - FUN = function(y) { - avg.fxn(as.numeric(x = object@data[g, knn.smooth[y, ]])) - } - ) - )) + # knn.smooth <- get.knn(data = data.plot, k = k)$nn.index + knn.smooth <- nn2(data = data.plot, k = k, searchtype = 'standard', eps = nn.eps)$nn.idx + avg.fxn <- ifelse(test = do.log, yes = mean, no = ExpMean) + lasso.fits <- data.frame(t(x = sapply( + X = genes.fit, + FUN = function(g) { + return(unlist(x = lapply( + X = 1:nrow(x = data.plot), + FUN = function(y) { + avg.fxn(as.numeric(x = object@data[g, knn.smooth[y, ]])) } - ) - ) - ) + ))) + } + ))) colnames(x = lasso.fits) <- rownames(x = data.plot) genes.old <- genes.fit[genes.fit %in% rownames(x = object@imputed)] - genes.new <- genes.fit[! (genes.fit %in% rownames(x = object@imputed))] + genes.new <- genes.fit[!genes.fit %in% rownames(x = object@imputed)] if (length(x = genes.old) > 0) { object@imputed[genes.old, ] <- lasso.fits[genes.old, ] } diff --git a/man/AddSmoothedScore.Rd b/man/AddSmoothedScore.Rd index b57147b1c..697821442 100644 --- a/man/AddSmoothedScore.Rd +++ b/man/AddSmoothedScore.Rd @@ -5,7 +5,8 @@ \title{Calculate smoothed expression values} \usage{ AddSmoothedScore(object, genes.fit = NULL, dim.1 = 1, dim.2 = 2, - reduction.use = "tsne", k = 30, do.log = FALSE, do.print = FALSE) + reduction.use = "tsne", k = 30, do.log = FALSE, do.print = FALSE, + nn.eps = 0) } \arguments{ \item{object}{Seurat object} @@ -24,6 +25,9 @@ AddSmoothedScore(object, genes.fit = NULL, dim.1 = 1, dim.2 = 2, \item{do.print}{Print progress (output the name of each gene after it has been imputed).} + +\item{nn.eps}{Error bound when performing nearest neighbor seach using RANN; +default of 0.0 implies exact nearest neighbor search} } \description{ Smooths expression values across the k-nearest neighbors based on dimensional reduction diff --git a/man/CalcAlignmentMetric.Rd b/man/CalcAlignmentMetric.Rd index cb8c78ad6..ca4925675 100644 --- a/man/CalcAlignmentMetric.Rd +++ b/man/CalcAlignmentMetric.Rd @@ -5,7 +5,7 @@ \title{Calculate an alignment score} \usage{ CalcAlignmentMetric(object, reduction.use = "cca.aligned", dims.use, - grouping.var, nn) + grouping.var, nn, nn.eps = 0) } \arguments{ \item{object}{Seurat object} @@ -18,6 +18,9 @@ Usually going to be cca.aligned.} \item{grouping.var}{Grouping variable used in the alignment.} \item{nn}{Number of neighbors to calculate in the NN graph construction} + +\item{nn.eps}{Error bound when performing nearest neighbor seach using RANN; +default of 0.0 implies exact nearest neighbor search} } \description{ Calculates an alignment score to determine how well aligned two (or more) diff --git a/tests/testthat/test_alignment.R b/tests/testthat/test_alignment.R index b78a1049a..2429686e4 100644 --- a/tests/testthat/test_alignment.R +++ b/tests/testthat/test_alignment.R @@ -21,7 +21,7 @@ test_that("Alignment returns expected values", { }) test_that("Alignment score calculated correctly", { - expect_equal(CalcAlignmentMetric(pbmc_cca, reduction.use = "cca.aligned", dims.use = 1:5, grouping.var = "group"), 0.625) + expect_equal(CalcAlignmentMetric(pbmc_cca, reduction.use = "cca.aligned", dims.use = 1:5, grouping.var = "group", nn = 5), 0.655) }) pbmc_cca <- CalcVarExpRatio(pbmc_cca, reduction.type = "pca", grouping.var = "group", dims.use = 1:5) From 917d675e021ed688e1c53275f17ededf4ece6ac1 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 17:04:44 -0400 Subject: [PATCH 09/15] Make VGAM a suggested package --- DESCRIPTION | 2 +- NAMESPACE | 2 -- R/differential_expression_internal.R | 8 ++++---- R/utilities_internal.R | 24 +++++++++++++++++++++++- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 50b6db21c..0fe2b2dd2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,6 @@ Imports: Rtsne, fpc, ape, - VGAM, pbapply, igraph, RANN, @@ -101,6 +100,7 @@ Collate: RoxygenNote: 6.0.1 Suggests: gdata, + VGAM, testthat, loomR, phateR, diff --git a/NAMESPACE b/NAMESPACE index 91d135689..7cd7a82e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -303,8 +303,6 @@ importFrom(ROCR,performance) importFrom(ROCR,prediction) importFrom(Rcpp,evalCpp) importFrom(Rtsne,Rtsne) -importFrom(VGAM,tobit) -importFrom(VGAM,vgam) importFrom(ape,as.phylo) importFrom(ape,drop.tip) importFrom(ape,nodelabels) diff --git a/R/differential_expression_internal.R b/R/differential_expression_internal.R index 77d1ca7e7..287cb4b14 100644 --- a/R/differential_expression_internal.R +++ b/R/differential_expression_internal.R @@ -94,14 +94,14 @@ DifferentialTobit <- function(x1, x2, lower = 1, upper = Inf) { #internal function to run Tobit DE test #credit to Cole Trapnell for this # -#' @importFrom VGAM vgam tobit #' @importFrom stats as.formula # -TobitFitter <- function(x, modelFormulaStr, lower = 1, upper = Inf){ +TobitFitter <- function(x, modelFormulaStr, lower = 1, upper = Inf) { + PackageCheck('VGAM') tryCatch( - expr = return(suppressWarnings(expr = vgam( + expr = return(suppressWarnings(expr = VGAM::vgam( formula = as.formula(object = modelFormulaStr), - family = tobit(Lower = lower, Upper = upper), + family = VGAM::tobit(Lower = lower, Upper = upper), data = x ))), #warning = function(w) { FM_fit }, diff --git a/R/utilities_internal.R b/R/utilities_internal.R index eb54f38e5..0f8cc6718 100644 --- a/R/utilities_internal.R +++ b/R/utilities_internal.R @@ -621,9 +621,31 @@ LengthCheck <- function(values, cutoff = 0) { # MaxN <- function(x, N = 2){ len <- length(x) - if(N > len) { + if (N > len) { warning('N greater than length(x). Setting N=length(x)') N <- length(x) } sort(x, partial = len - N + 1)[len - N + 1] } + +# Check the existence of a package +# +# @param ... Package names +# @param error If true, throw an error if the package doesn't exist +# +# @return Invisibly returns boolean denoting if the package is installed +# +#' @importFrom utils installed.packages +# +PackageCheck <- function(..., error = TRUE) { + pkgs <- unlist(x = c(...), use.names = FALSE) + package.installed <- pkgs %in% rownames(x = installed.packages()) + if (error && any(!package.installed)) { + stop( + "Cannot find ", + paste(pkgs[!package.installed], collapse = ', '), + "; please install" + ) + } + invisible(x = package.installed) +} From 603caf6eeed9948132aaa110c0889b3c220688c3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 25 Jun 2018 17:26:54 -0400 Subject: [PATCH 10/15] Make tclust a suggested package --- DESCRIPTION | 2 +- NAMESPACE | 1 - R/cluster_determination.R | 7 ++++--- man/DoKMeans.Rd | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0fe2b2dd2..069acba02 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,6 @@ Imports: reshape2, gplots, Rcpp (>= 0.11.0), - tclust, ranger, dtw, SDMTools, @@ -101,6 +100,7 @@ RoxygenNote: 6.0.1 Suggests: gdata, VGAM, + tclust, testthat, loomR, phateR, diff --git a/NAMESPACE b/NAMESPACE index 7cd7a82e4..9293dca11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -425,7 +425,6 @@ importFrom(stats,smooth.spline) importFrom(stats,t.test) importFrom(stats,var) importFrom(stats,wilcox.test) -importFrom(tclust,tkmeans) importFrom(tidyr,gather) importFrom(tidyr,separate) importFrom(tidyr,unite) diff --git a/R/cluster_determination.R b/R/cluster_determination.R index 2421ae9be..b64210c21 100644 --- a/R/cluster_determination.R +++ b/R/cluster_determination.R @@ -1,5 +1,6 @@ #' @include seurat.R NULL + #' Cluster Determination #' #' Identify clusters of cells by a shared nearest neighbor (SNN) modularity @@ -442,7 +443,7 @@ BuildRFClassifier <- function( #' K-Means Clustering #' -#' Perform k=means clustering on both genes and single cells +#' Perform k-means clustering on both genes and single cells #' #' K-means and heatmap are calculated on object@@scale.data #' @@ -464,7 +465,6 @@ BuildRFClassifier <- function( #' #' @importFrom methods new #' @importFrom stats kmeans -#' @importFrom tclust tkmeans #' #' @return Seurat object where the k-means results for genes is stored in #' object@@kmeans.obj[[1]], and the k-means results for cells is stored in @@ -506,7 +506,8 @@ DoKMeans <- function( kmeans.data <- data.use[genes.use, cells.use] if (do.constrained) { set.seed(seed = k.seed) - kmeans.obj <- tkmeans(x = kmeans.data, k = k.genes, ...) + PackageCheck('tclust') + kmeans.obj <- tclust::tkmeans(x = kmeans.data, k = k.genes, ...) } else { set.seed(seed = k.seed) kmeans.obj <- kmeans(x = kmeans.data, centers = k.genes, ...) diff --git a/man/DoKMeans.Rd b/man/DoKMeans.Rd index 07ba03700..d165805bf 100644 --- a/man/DoKMeans.Rd +++ b/man/DoKMeans.Rd @@ -44,7 +44,7 @@ object@kmeans.col[[1]]. The cluster for each cell is stored in object@meta.data[ and also object@ident (if set.ident=TRUE) } \description{ -Perform k=means clustering on both genes and single cells +Perform k-means clustering on both genes and single cells } \details{ K-means and heatmap are calculated on object@scale.data From c0238142f56aedc0109115ab8b1b4daa59e9874c Mon Sep 17 00:00:00 2001 From: timoast Date: Tue, 26 Jun 2018 10:52:54 -0400 Subject: [PATCH 11/15] subset raw.data when converting to sce --- R/conversion.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/conversion.R b/R/conversion.R index aa1181ac4..9a79c7118 100644 --- a/R/conversion.R +++ b/R/conversion.R @@ -157,9 +157,9 @@ Convert.seurat <- function( from@data <- as.matrix(from@data) } sce <- if (class(from@raw.data) %in% c("matrix", "dgTMatrix")) { - SingleCellExperiment::SingleCellExperiment(assays = list(counts = as(from@raw.data[, from@cell.names], "dgCMatrix"))) + SingleCellExperiment::SingleCellExperiment(assays = list(counts = as(from@raw.data[rownames(from@data), from@cell.names], "dgCMatrix"))) } else if (inherits(x = from@raw.data, what = "dgCMatrix")) { - SingleCellExperiment::SingleCellExperiment(assays = list(counts = from@raw.data[, from@cell.names])) + SingleCellExperiment::SingleCellExperiment(assays = list(counts = from@raw.data[rownames(from@data), from@cell.names])) } else { stop("Invalid class stored in seurat object's raw.data slot") } From 86b6a20e376669600c2d20fbc6371c610ba26a88 Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 26 Jun 2018 11:11:37 -0400 Subject: [PATCH 12/15] make caret and ranger suggested packages --- NAMESPACE | 3 --- R/cluster_determination.R | 5 ++--- R/cluster_validation.R | 8 ++++---- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9293dca11..02153c517 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -307,8 +307,6 @@ importFrom(ape,as.phylo) importFrom(ape,drop.tip) importFrom(ape,nodelabels) importFrom(ape,plot.phylo) -importFrom(caret,train) -importFrom(caret,trainControl) importFrom(cluster,clara) importFrom(cowplot,get_legend) importFrom(cowplot,plot_grid) @@ -375,7 +373,6 @@ importFrom(pbapply,pbapply) importFrom(pbapply,pblapply) importFrom(pbapply,pbsapply) importFrom(png,readPNG) -importFrom(ranger,ranger) importFrom(reshape2,melt) importFrom(reticulate,dict) importFrom(reticulate,import) diff --git a/R/cluster_determination.R b/R/cluster_determination.R index b64210c21..8e203b808 100644 --- a/R/cluster_determination.R +++ b/R/cluster_determination.R @@ -334,7 +334,6 @@ NumberClusters <- function(object) { #' #' @import Matrix #' @importFrom stats predict -#' @importFrom ranger ranger #' #' @export #' @@ -357,6 +356,7 @@ ClassifyCells <- function( new.data = NULL, ... ) { + PackageCheck('ranger') # build the classifier if (missing(classifier)){ classifier <- BuildRFClassifier( @@ -398,7 +398,6 @@ ClassifyCells <- function( #' @return Returns the random forest classifier #' #' @import Matrix -#' @importFrom ranger ranger #' #' @export #' @@ -431,7 +430,7 @@ BuildRFClassifier <- function( if (verbose) { message("Training Classifier ...") } - classifier <- ranger( + classifier <- ranger::ranger( data = training.data, dependent.variable.name = "class", classification = TRUE, diff --git a/R/cluster_validation.R b/R/cluster_validation.R index 4ea629757..d6c2308f2 100644 --- a/R/cluster_validation.R +++ b/R/cluster_validation.R @@ -13,7 +13,6 @@ NULL #' clusters #' @param acc.cutoff Accuracy cutoff for classifier #' @param verbose Controls whether to display progress and merging results -#' @importFrom caret trainControl train #' @return Returns a Seurat object, object@@ident has been updated with new #' cluster info #' @export @@ -35,6 +34,7 @@ ValidateClusters <- function( acc.cutoff = 0.9, verbose = TRUE ) { + PackageCheck('caret') # probably should refactor to make cleaner if (length(x = object@snn) > 1) { SNN.use <- object@snn @@ -133,7 +133,6 @@ ValidateClusters <- function( #' @param pc.use Which PCs to use for model construction #' @param top.genes Use the top X genes for model construction #' @param acc.cutoff Accuracy cutoff for classifier -#' @importFrom caret trainControl train #' @return Returns a Seurat object, object@@ident has been updated with #' new cluster info #' @export @@ -155,6 +154,7 @@ ValidateSpecificClusters <- function( top.genes = 30, acc.cutoff = 0.9 ) { + PackageCheck('caret') acc <- RunClassifier( object = object, group1 = cluster1, @@ -203,9 +203,9 @@ RunClassifier <- function(object, group1, group2, pcs, num.genes) { xv <- apply(X = x, MARGIN = 2, FUN = var) x <- x[, names(x = which(xv > 0))] # run k-fold cross validation - ctrl <- trainControl(method = "repeatedcv", repeats = 5) + ctrl <- caret::trainControl(method = "repeatedcv", repeats = 5) set.seed(seed = 1500) - model <- train( + model <- caret::train( x = x, y = as.factor(x = y), formula = as.factor(x = y) ~ ., From 3ae71c34964178bb80be6963b74ee87cc084931f Mon Sep 17 00:00:00 2001 From: Andrew Butler Date: Tue, 26 Jun 2018 11:14:25 -0400 Subject: [PATCH 13/15] add additional ranger check --- R/cluster_determination.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/cluster_determination.R b/R/cluster_determination.R index 8e203b808..2f10f6499 100644 --- a/R/cluster_determination.R +++ b/R/cluster_determination.R @@ -414,6 +414,7 @@ BuildRFClassifier <- function( verbose = TRUE, ... ) { + PackageCheck('ranger') training.classes <- as.vector(x = training.classes) training.genes <- SetIfNull( x = training.genes, From d65536a690bef37a369b8c9c56292bbbeb130ed1 Mon Sep 17 00:00:00 2001 From: Marcel Schilling Date: Wed, 27 Jun 2018 10:10:04 +0200 Subject: [PATCH 14/15] Transform `data` also for single cells in AverageExpression This fixes a discrepancy in AverageExpression: When `use.scale` and `use.raw` are both FALSE (default), identities with several cells were transformed back (exp(x) - 1) before averaging while single cell identities were neither averaged (no need) nor transformed (discrepancy). This commit fixes this by transforming the data data conditionally in the scenario described above. It uses `if(!(use.scale | use.raw))` assuming this would be faster to evaluate than `if(use.slot == "data")` or applying `mean` to a single value to safe the conditional altogether. This performance assumption, however, was not tested. fixes #571 --- R/utilities.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index 1be939e20..979aae9ba 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -592,6 +592,10 @@ AverageExpression <- function( genes.assay <- unique(x = intersect(x = genes.assay, y = rownames(x = data.use))) if (length(x = temp.cells) == 1) { data.temp <- (data.use[genes.assay, temp.cells]) + # transform data if needed (alternative: apply fxn.average to single value above) + if(!(use.scale | use.raw)) { # equivalent: slot.use == "data" + data.temp <- expm1(data.temp) + } } if (length(x = temp.cells) >1 ) { data.temp <- apply( From 59a3f64f61e6ae276d8179dc248f2ecabe8705f6 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 27 Jun 2018 10:33:24 -0400 Subject: [PATCH 15/15] Address #572: reorder idents after dropping levels --- R/interaction.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/interaction.R b/R/interaction.R index bb62745a0..7e88c3b8e 100644 --- a/R/interaction.R +++ b/R/interaction.R @@ -921,6 +921,8 @@ StashIdent <- function(object, save.name = "oldIdent") { #' #' @return A Seurat object where object@@ident has been appropriately modified #' +#' @importFrom stats reorder +#' #' @export #' #' @examples @@ -952,7 +954,7 @@ SetIdent <- function(object, cells.use = NULL, ident.use = NULL) { ) ) object@ident[cells.use] <- ident.use - object@ident <- droplevels(x = object@ident) + object@ident <- reorder(x = droplevels(x = object@ident)) return(object) }