diff --git a/DESCRIPTION b/DESCRIPTION index b23f1f21c..56107bfc3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.1.1 -Date: 2022-05-01 +Version: 4.2.0 +Date: 2022-09-21 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -48,7 +48,7 @@ Imports: leiden (>= 0.3.1), lmtest, MASS, - Matrix (>= 1.2-14), + Matrix (>= 1.5.0), matrixStats, miniUI, patchwork, @@ -65,8 +65,8 @@ Imports: Rtsne, scales, scattermore (>= 0.7), - sctransform (>= 0.3.3), - SeuratObject (>= 4.1.0), + sctransform (>= 0.3.4), + SeuratObject (>= 4.1.2), shiny, spatstat.core, spatstat.geom, @@ -74,7 +74,7 @@ Imports: tibble, tools, utils, - uwot (>= 0.1.9) + uwot (>= 0.1.14) LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: MIT + file LICENSE LazyData: true @@ -95,7 +95,7 @@ Collate: 'tree.R' 'utilities.R' 'zzz.R' -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.1 Encoding: UTF-8 Suggests: ape, diff --git a/NAMESPACE b/NAMESPACE index 20b61af05..373809068 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -521,7 +521,6 @@ importFrom(ggplot2,position_jitterdodge) importFrom(ggplot2,scale_alpha) importFrom(ggplot2,scale_alpha_ordinal) importFrom(ggplot2,scale_color_brewer) -importFrom(ggplot2,scale_color_discrete) importFrom(ggplot2,scale_color_distiller) importFrom(ggplot2,scale_color_gradient) importFrom(ggplot2,scale_color_gradientn) diff --git a/NEWS.md b/NEWS.md index 27201ac2e..27eb81bf8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,16 @@ +# Seurat 4.2.0 (2022-09-21) + +## Changes +- Fix legend color in `DoHeatmap()` ([#5783](https://github.com/satijalab/seurat/issues/5783)) +- Fix bug in `ScaleData()` when regressing out one gene ([#5970](https://github.com/satijalab/seurat/pull/5970)) +- Fix name pulling in `PlotPerturbScore()` ([#6081](https://github.com/satijalab/seurat/pull/6081)) +- Support spaceranger 2.0 ([#6208](https://github.com/satijalab/seurat/pull/6208)) +- Fix bug in `SpatialDimPlot()` when using `group.by` ([#6179](https://github.com/satijalab/seurat/issues/6179)) +- Add `add.noise` parameter in `VlnPlot()` +([#5756](https://github.com/satijalab/seurat/issues/5756)) +- Fix uwot model backwards compatibility ([#6345](https://github.com/satijalab/seurat/issues/6345)) +- Allow `pseudocount.use` in differential expression functions to be set at the `Assay` level + # Seurat 4.1.1 (2022-05-01) ## Changes diff --git a/R/clustering.R b/R/clustering.R index de0cc96c5..786a18fef 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -259,9 +259,7 @@ PredictAssay <- function( ) colnames(x = predicted) <- Cells(x = object) if (return.assay) { - # TODO: restore once check.matrix is implemented in SeuratObject - # predicted.assay <- CreateAssayObject(data = predicted, check.matrix = FALSE) - predicted.assay <- CreateAssayObject(data = predicted) + predicted.assay <- CreateAssayObject(data = predicted, check.matrix = FALSE) return (predicted.assay) } else { return (predicted) @@ -1689,7 +1687,7 @@ RunLeiden <- function( graph_from_adj_list(adjlist = object) } else if (inherits(x = object, what = c('dgCMatrix', 'matrix', 'Matrix'))) { if (inherits(x = object, what = 'Graph')) { - object <- as(object = object, Class = "dgCMatrix") + object <- as.sparse(x = object) } graph_from_adjacency_matrix(adjmatrix = object, weighted = TRUE) } else if (inherits(x = object, what = 'igraph')) { diff --git a/R/differential_expression.R b/R/differential_expression.R index aac07e55c..36aec77c6 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -59,7 +59,6 @@ FindAllMarkers <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -136,7 +135,6 @@ FindAllMarkers <- function( latent.vars = latent.vars, min.cells.feature = min.cells.feature, min.cells.group = min.cells.group, - pseudocount.use = pseudocount.use, mean.fxn = mean.fxn, fc.name = fc.name, base = base, @@ -512,6 +510,7 @@ FindMarkers.default <- function( densify = FALSE, ... ) { + pseudocount.use <- pseudocount.use %||% 1 ValidateCellGroups( object = object, cells.1 = cells.1, @@ -603,6 +602,9 @@ FindMarkers.default <- function( return(de.results) } +#' @param norm.method Normalization method for fold change calculation when +#' \code{slot} is \dQuote{\code{data}} +#' #' @rdname FindMarkers #' @concept differential_expression #' @export @@ -630,8 +632,10 @@ FindMarkers.Assay <- function( fc.name = NULL, base = 2, densify = FALSE, + norm.method = NULL, ... ) { + pseudocount.use <- pseudocount.use %||% 1 data.slot <- ifelse( test = test.use %in% DEmethods_counts(), yes = 'counts', @@ -652,7 +656,8 @@ FindMarkers.Assay <- function( pseudocount.use = pseudocount.use, mean.fxn = mean.fxn, fc.name = fc.name, - base = base + base = base, + norm.method = norm.method ) de.results <- FindMarkers( object = data.use, @@ -712,6 +717,7 @@ FindMarkers.SCTAssay <- function( recorrect_umi = TRUE, ... ) { + pseudocount.use <- pseudocount.use %||% 1 data.slot <- ifelse( test = test.use %in% DEmethods_counts(), yes = 'counts', @@ -813,6 +819,7 @@ FindMarkers.DimReduc <- function( ... ) { + pseudocount.use <- pseudocount.use %||% 1 if (test.use %in% DEmethods_counts()) { stop("The following tests cannot be used for differential expression on a reduction as they assume a count model: ", paste(DEmethods_counts(), collapse=", ")) @@ -927,7 +934,6 @@ FindMarkers.Seurat <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -971,17 +977,14 @@ FindMarkers.Seurat <- function( } # check normalization method norm.command <- paste0("NormalizeData.", assay) - if (norm.command %in% Command(object = object) && is.null(x = reduction)) { - norm.method <- Command( + norm.method <- if (norm.command %in% Command(object = object) && is.null(x = reduction)) { + Command( object = object, command = norm.command, value = "normalization.method" ) - if (norm.method != "LogNormalize") { - mean.fxn <- function(x) { - return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) - } - } + } else { + NULL } de.results <- FindMarkers( object = data.use, @@ -1000,11 +1003,11 @@ FindMarkers.Seurat <- function( latent.vars = latent.vars, min.cells.feature = min.cells.feature, min.cells.group = min.cells.group, - pseudocount.use = pseudocount.use, mean.fxn = mean.fxn, base = base, fc.name = fc.name, densify = densify, + norm.method = norm.method, ... ) return(de.results) @@ -1050,7 +1053,9 @@ FoldChange.default <- function( return(fc.results) } - +#' @param norm.method Normalization method for mean function selection +#' when \code{slot} is \dQuote{\code{data}} +#' #' @importFrom Matrix rowMeans #' @rdname FoldChange #' @concept differential_expression @@ -1066,18 +1071,25 @@ FoldChange.Assay <- function( fc.name = NULL, mean.fxn = NULL, base = 2, + norm.method = NULL, ... ) { + pseudocount.use <- pseudocount.use %||% 1 data <- GetAssayData(object = object, slot = slot) + default.mean.fxn <- function(x) { + return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + } mean.fxn <- mean.fxn %||% switch( EXPR = slot, - 'data' = function(x) { - return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) - }, + 'data' = switch( + EXPR = norm.method %||% '', + 'LogNormalize' = function(x) { + return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + }, + default.mean.fxn + ), 'scale.data' = rowMeans, - function(x) { - return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) - } + default.mean.fxn ) # Omit the decimal value of e from the column name if base == exp(1) base.text <- ifelse( @@ -1111,11 +1123,12 @@ FoldChange.DimReduc <- function( cells.2, features = NULL, slot = NULL, - pseudocount.use = NULL, + pseudocount.use = 1, fc.name = NULL, mean.fxn = NULL, ... ) { + pseudocount.use <- pseudocount.use %||% 1 mean.fxn <- mean.fxn %||% rowMeans fc.name <- fc.name %||% "avg_diff" data <- t(x = Embeddings(object = object)) @@ -1143,7 +1156,7 @@ FoldChange.DimReduc <- function( #' @param assay Assay to use in fold change calculation #' @param slot Slot to pull data from #' @param pseudocount.use Pseudocount to add to averaged expression values when -#' calculating logFC. 1 by default. +#' calculating logFC. #' @param mean.fxn Function to use for fold change or average difference calculation #' @param base The base with respect to which logarithms are computed. #' @param fc.name Name of the fold change, average difference, or custom function column @@ -1163,7 +1176,7 @@ FoldChange.Seurat <- function( slot = 'data', reduction = NULL, features = NULL, - pseudocount.use = 1, + pseudocount.use = NULL, mean.fxn = NULL, base = 2, fc.name = NULL, diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 581f35427..0c78ace99 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1379,6 +1379,10 @@ RunUMAP.default <- function( object = reduction.model, slot = "model" ) + # add num_precomputed_nns to = 3.3.0 slot @@ -490,7 +494,7 @@ HTOHeatmap <- function( #' @param same.y.lims Set all the y-axis limits to the same values #' @param log plot the feature axis on log scale #' @param ncol Number of columns if multiple plots are displayed -#' @param slot Use non-normalized counts data for plotting +#' @param slot Slot to pull expression data from (e.g. "counts" or "data") #' @param stack Horizontally stack plots for each feature #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} #' ggplot object. If \code{FALSE}, return a list of ggplot @@ -555,6 +559,7 @@ RidgePlot <- function( #' single violin shapes. #' @param adjust Adjust parameter for geom_violin #' @param flip flip plot orientation (identities on x-axis) +#' @param add.noise determine if adding a small noise for plotting #' @param raster Convert points to raster format. Requires 'ggrastr' to be installed. # default is \code{NULL} which automatically rasterizes if ggrastr is installed and # number of points exceed 100,000. @@ -593,6 +598,7 @@ VlnPlot <- function( combine = TRUE, fill.by = 'feature', flip = FALSE, + add.noise = TRUE, raster = NULL ) { if ( @@ -629,6 +635,7 @@ VlnPlot <- function( combine = combine, fill.by = fill.by, flip = flip, + add.noise = add.noise, raster = raster )) } @@ -5550,12 +5557,13 @@ Col2Hex <- function(...) { # @param group.by Group (color) cells in different ways (for example, orig.ident) # @param split.by A variable to split the plot by # @param log plot Y axis on log scale -# @param slot Use non-normalized counts data for plotting +# @param slot Slot to pull expression data from (e.g. "counts" or "data") # @param stack Horizontally stack plots for multiple feature # @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} # ggplot object. If \code{FALSE}, return a list of ggplot objects # @param fill.by Color violins/ridges based on either 'feature' or 'ident' # @param flip flip plot orientation (identities on x-axis) +# @param add.noise determine if adding a small noise for plotting # @param raster Convert points to raster format, default is \code{NULL} which # automatically rasterizes if plotting more than 100,000 cells # @@ -5587,6 +5595,7 @@ ExIPlot <- function( combine = TRUE, fill.by = NULL, flip = FALSE, + add.noise = TRUE, raster = NULL ) { assay <- assay %||% DefaultAssay(object = object) @@ -5672,6 +5681,7 @@ ExIPlot <- function( pt.size = pt.size, log = log, fill.by = fill.by, + add.noise = add.noise, flip = flip )) } @@ -5689,6 +5699,7 @@ ExIPlot <- function( cols = cols, pt.size = pt.size, log = log, + add.noise = add.noise, raster = raster )) } @@ -6380,6 +6391,7 @@ MultiExIPlot <- function( seed.use = 42, log = FALSE, fill.by = NULL, + add.noise = TRUE, flip = NULL ) { if (!(fill.by %in% c("feature", "ident"))) { @@ -6447,6 +6459,9 @@ MultiExIPlot <- function( } else { noise <- rnorm(n = nrow(x = data)) / 100000 } + if (!add.noise) { + noise <- noise*0 + } for (f in unique(x = data$feature)) { if (all(data$expression[(data$feature == f)] == data$expression[(data$feature == f)][1])) { warning( @@ -7208,7 +7223,8 @@ SingleDimPlot <- function( #' @param pt.size Size of points for violin plots #' @param cols Colors to use for plotting #' @param seed.use Random seed to use. If NULL, don't set a seed -#' @param log plot Y axis on log scale +#' @param log plot Y axis on log10 scale +#' @param add.noise determine if adding small noise for plotting #' @param raster Convert points to raster format. Requires 'ggrastr' to be installed. #' default is \code{NULL} which automatically rasterizes if ggrastr is installed and #' number of points exceed 100,000. @@ -7238,6 +7254,7 @@ SingleExIPlot <- function( cols = NULL, seed.use = 42, log = FALSE, + add.noise = TRUE, raster = NULL ) { if (!is.null(x = raster) && isTRUE(x = raster)){ @@ -7282,6 +7299,9 @@ SingleExIPlot <- function( } else { noise <- rnorm(n = length(x = data[, feature])) / 100000 } + if (!add.noise) { + noise <- noise * 0 + } if (all(data[, feature] == data[, feature][1])) { warning(paste0("All cells have the same value of ", feature, ".")) } else{ @@ -7678,7 +7698,7 @@ SingleSpatialPlot <- function( colors <- DiscretePalette(length(unique(data[[col.by]])), palette = cols) scale <- scale_fill_manual(values = colors, na.value = na.value) } else { - cols <- cols[names(x = cols) %in% data$ident] + cols <- cols[names(x = cols) %in% data[[gsub(pattern = '`', replacement = "", x = col.by)]]] scale <- scale_fill_manual(values = cols, na.value = na.value) } plot <- plot + scale diff --git a/cran-comments.md b/cran-comments.md index 2e206a8e3..82439ff3d 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,10 +1,7 @@ -# Seurat v4.1.1 +# Seurat v4.2.0 ## Test environments * local Ubuntu 20.04 install, R 4.1.3 -* Ubuntu 16.04.6 (on travis-ci), R 4.0.0, R devel -* macOS 10.13.6 (on travis-ci), R 4.0.2 -* Windows Server 2012 R2 (on AppVeyor), R 4.1.0 Patched * win-builder (release, devel) ## R CMD check results diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 239c0a8c2..b06367ba3 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,8 +10,12 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "blue") }, + cols = if (blend) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "blue") + }, pt.size = NULL, order = FALSE, min.cutoff = NA, diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd index 95da7a4be..622474624 100644 --- a/man/FindAllMarkers.Rd +++ b/man/FindAllMarkers.Rd @@ -22,7 +22,6 @@ FindAllMarkers( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -113,9 +112,6 @@ of the two groups, currently only used for poisson and negative binomial tests} \item{min.cells.group}{Minimum number of cells in one of the groups} -\item{pseudocount.use}{Pseudocount to add to averaged expression values when -calculating logFC. 1 by default.} - \item{mean.fxn}{Function to use for fold change or average difference calculation. If NULL, the appropriate function will be chose according to the slot used} diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index 70805a8cc..160c69c0d 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -58,6 +58,7 @@ FindMarkers(object, ...) fc.name = NULL, base = 2, densify = FALSE, + norm.method = NULL, ... ) @@ -131,7 +132,6 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -243,6 +243,9 @@ slot "avg_diff".} \item{base}{The base with respect to which logarithms are computed.} +\item{norm.method}{Normalization method for fold change calculation when +\code{slot} is \dQuote{\code{data}}} + \item{recorrect_umi}{Recalculate corrected UMI counts using minimum of the median UMIs when performing DE using multiple SCT objects; default is TRUE} \item{ident.1}{Identity class to define markers for; pass an object of class diff --git a/man/FoldChange.Rd b/man/FoldChange.Rd index f4361bd26..edda396b5 100644 --- a/man/FoldChange.Rd +++ b/man/FoldChange.Rd @@ -22,6 +22,7 @@ FoldChange(object, ...) fc.name = NULL, mean.fxn = NULL, base = 2, + norm.method = NULL, ... ) @@ -31,7 +32,7 @@ FoldChange(object, ...) cells.2, features = NULL, slot = NULL, - pseudocount.use = NULL, + pseudocount.use = 1, fc.name = NULL, mean.fxn = NULL, ... @@ -47,7 +48,7 @@ FoldChange(object, ...) slot = "data", reduction = NULL, features = NULL, - pseudocount.use = 1, + pseudocount.use = NULL, mean.fxn = NULL, base = 2, fc.name = NULL, @@ -74,10 +75,13 @@ If NULL, use all features} \item{slot}{Slot to pull data from} \item{pseudocount.use}{Pseudocount to add to averaged expression values when -calculating logFC. 1 by default.} +calculating logFC.} \item{base}{The base with respect to which logarithms are computed.} +\item{norm.method}{Normalization method for mean function selection +when \code{slot} is \dQuote{\code{data}}} + \item{ident.1}{Identity class to calculate fold change for; pass an object of class \code{phylo} or 'clustertree' to calculate fold change for a node in a cluster tree; passing 'clustertree' requires \code{\link{BuildClusterTree}} to have been run} diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index c02543005..e08bd682e 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,10 +64,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
}}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
}} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index c3f96ffa5..dc0469132 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,10 +75,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
}}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
}} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 1eacd0ecd..a2b2fc588 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,10 +33,7 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/PrepSCTIntegration.Rd b/man/PrepSCTIntegration.Rd index ee1e15cac..d3fdecae4 100644 --- a/man/PrepSCTIntegration.Rd +++ b/man/PrepSCTIntegration.Rd @@ -80,7 +80,7 @@ anchors <- FindIntegrationAnchors( normalization.method = "SCT", anchor.features = features ) -pancreas.integrated <- IntegrateData(anchors) +pancreas.integrated <- IntegrateData(anchors, normalization.method = "SCT") } } diff --git a/man/RidgePlot.Rd b/man/RidgePlot.Rd index 153d2ba25..7825a99a5 100644 --- a/man/RidgePlot.Rd +++ b/man/RidgePlot.Rd @@ -47,7 +47,7 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{ncol}{Number of columns if multiple plots are displayed} -\item{slot}{Use non-normalized counts data for plotting} +\item{slot}{Slot to pull expression data from (e.g. "counts" or "data")} \item{stack}{Horizontally stack plots for each feature} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 9b3fc3749..351af75c9 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. } \section{Package options}{ diff --git a/man/SingleExIPlot.Rd b/man/SingleExIPlot.Rd index 9ceede508..636503c5d 100644 --- a/man/SingleExIPlot.Rd +++ b/man/SingleExIPlot.Rd @@ -16,6 +16,7 @@ SingleExIPlot( cols = NULL, seed.use = 42, log = FALSE, + add.noise = TRUE, raster = NULL ) } @@ -41,7 +42,9 @@ expression of the attribute being potted} \item{seed.use}{Random seed to use. If NULL, don't set a seed} -\item{log}{plot Y axis on log scale} +\item{log}{plot Y axis on log10 scale} + +\item{add.noise}{determine if adding small noise for plotting} \item{raster}{Convert points to raster format. Requires 'ggrastr' to be installed. default is \code{NULL} which automatically rasterizes if ggrastr is installed and diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 75a30f331..951fc2afc 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -25,6 +25,7 @@ VlnPlot( combine = TRUE, fill.by = "feature", flip = FALSE, + add.noise = TRUE, raster = NULL ) } @@ -59,7 +60,7 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{ncol}{Number of columns if multiple plots are displayed} -\item{slot}{Use non-normalized counts data for plotting} +\item{slot}{Slot to pull expression data from (e.g. "counts" or "data")} \item{split.plot}{plot each group of the split violin plots by multiple or single violin shapes.} @@ -73,6 +74,8 @@ ggplot object. If \code{FALSE}, return a list of ggplot} \item{flip}{flip plot orientation (identities on x-axis)} +\item{add.noise}{determine if adding a small noise for plotting} + \item{raster}{Convert points to raster format. Requires 'ggrastr' to be installed.} } \value{ diff --git a/man/reexports.Rd b/man/reexports.Rd index 2c9dbf1e5..4e5b1716e 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -68,6 +68,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} + \item{SeuratObject}{\code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} diff --git a/tests/testthat/test_data_manipulation.R b/tests/testthat/test_data_manipulation.R index 17a715f4f..238f9a50d 100644 --- a/tests/testthat/test_data_manipulation.R +++ b/tests/testthat/test_data_manipulation.R @@ -91,7 +91,7 @@ test_that("Fast implementation of row scaling returns expected values", { expect_true(max(mat.clipped, na.rm = T) >= 0.2) }) -mat <- as(object = matrix(rnorm(100), nrow = 10, ncol = 10), Class = "dgCMatrix") +mat <- as.sparse(x = matrix(rnorm(100), nrow = 10, ncol = 10)) test_that("Row scaling with known stats works", { mat.rowmeans <- rowMeans(x = mat) @@ -147,7 +147,7 @@ test_that("Fast implementation of rbind returns expected values", { expect_equal(fcv[10,10], merged.mat[20,10]) }) -mat <- as(mat, "dgCMatrix") +mat <- as.sparse(mat) test_that("Fast implementation of ExpMean returns expected values",{ expect_equal(ExpMean(mat[1,]), FastExpMean(mat, display_progress = F)[1]) expect_equal(ExpMean(mat[5,]), FastExpMean(mat, display_progress = F)[5]) @@ -173,7 +173,7 @@ test_that("Fast implementation of LogVMR returns expected values", { test_that("Row variance calculations for sparse matrices work", { expect_equal(apply(X = mat, MARGIN = 1, FUN = var), SparseRowVar(mat = mat, display_progress = FALSE), tolerance = 1e-6) - expect_equal(apply(X = mat2, MARGIN = 1, FUN = var), SparseRowVar(mat = as(object = mat2, Class = "dgCMatrix"), display_progress = FALSE), tolerance = 1e-6) + expect_equal(apply(X = mat2, MARGIN = 1, FUN = var), SparseRowVar(mat = as.sparse(x = mat2), display_progress = FALSE), tolerance = 1e-6) }) # Tests for data structure manipulations diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 81e231f76..bf2e92448 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -2,12 +2,17 @@ suppressWarnings(RNGversion(vstr = "3.5.3")) set.seed(seed = 42) -# Tests for FindMarkers default parameters +# Tests for FindMarkers # -------------------------------------------------------------------------------- context("FindMarkers") +clr.obj <- suppressWarnings(NormalizeData(pbmc_small, normalization.method = "CLR")) +sct.obj <- suppressWarnings(suppressMessages(SCTransform(pbmc_small))) + markers.0 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1))) markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1))) +results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1))) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1))) test_that("Default settings work as expected", { expect_error(FindMarkers(object = pbmc_small)) @@ -29,6 +34,24 @@ test_that("Default settings work as expected", { expect_equal(markers.01[1, "p_val_adj"], 3.916481e-09) expect_equal(nrow(x = markers.01), 201) expect_equal(rownames(x = markers.01)[1], "TYMP") + + # CLR normalization + expect_equal(results.clr[1, "p_val"], 1.209462e-11) + expect_equal(results.clr[1, "avg_logFC"], -0.8290693, tolerance = 1e-6) + expect_equal(results.clr[1, "pct.1"], 0.111) + expect_equal(results.clr[1, "pct.2"], 0.96) + expect_equal(results.clr[1, "p_val_adj"], 2.781762e-09) + expect_equal(nrow(x = results.clr), 85) + expect_equal(rownames(x = results.clr)[1], "S100A8") + + # SCT normalization + expect_equal(results.sct[1, "p_val"], 6.225491e-11) + expect_equal(results.sct[1, "avg_logFC"], -0.6768721, tolerance = 1e-6) + expect_equal(results.sct[1, "pct.1"], 0.111) + expect_equal(results.sct[1, "pct.2"], 0.96) + expect_equal(results.sct[1, "p_val_adj"], 1.369608e-08) + expect_equal(nrow(x = results.sct), 92) + expect_equal(rownames(x = results.sct)[1], "TYMP") }) @@ -65,6 +88,28 @@ test_that("passing cell names works", { expect_equal(rownames(x = results)[1], "IFI30") }) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) +results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) +test_that("setting pseudocount.use works", { + expect_equal(nrow(x = results), 202) + expect_equal(results[1, "avg_logFC"], -2.630395, tolerance = 1e-6) + expect_equal(nrow(x = results.clr), 182) + expect_equal(results.clr[1, "avg_logFC"], -2.317338, tolerance = 1e-6) + expect_equal(nrow(results.sct), 185) + expect_equal(results.sct[1, "avg_logFC"], -1.845681, tolerance = 1e-6) +}) + +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) +results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) +test_that("setting mean.fxn works", { + expect_equal(nrow(x = results), 191) + expect_equal(results[1, "avg_logFC"], -4.204346, tolerance = 1e-6) + expect_equal(results.clr[1, "avg_logFC"], -1.353025, tolerance = 1e-6) + expect_equal(results.sct[1, "avg_logFC"], -1.064042, tolerance = 1e-6) +}) + results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1))) test_that("logfc.threshold works", { expect_equal(nrow(x = results), 112) @@ -236,6 +281,51 @@ test_that("LR test works", { expect_equal(rownames(x = results)[1], "LYZ") }) +# Tests for FindAllMarkers +# ------------------------------------------------------------------------------- +results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small))) +results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj))) +results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj))) +results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1))) + +test_that("FindAllMarkers works as expected", { + expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) + expect_equal(results[1, "p_val"], 9.572778e-13) + expect_equal(results[1, "avg_log2FC"], -5.820829, tolerance = 1e-6) + expect_equal(results[1, "pct.1"], 0.083) + expect_equal(results[1, "pct.2"], 0.909) + expect_equal(results[1, "p_val_adj"], 2.201739e-10) + expect_equal(nrow(x = results), 222) + expect_equal(rownames(results)[1], "HLA-DPB1") + + # CLR normalization + expect_equal(results.clr[1, "p_val"], 1.209462e-11) + expect_equal(results.clr[1, "avg_log2FC"], -1.079924, tolerance = 1e-6) + expect_equal(results.clr[1, "pct.1"], 0.083) + expect_equal(results.clr[1, "pct.2"], 0.909) + expect_equal(results.clr[1, "p_val_adj"], 3.079373e-10) + expect_equal(nrow(x = results.clr), 200) + expect_equal(rownames(x = results.clr)[1], "HLA-DPB1") + + # SCT normalization + expect_equal(results.sct[1, "p_val"], 6.225491e-11) + expect_equal(results.sct[1, "avg_log2FC"], -1.265307, tolerance = 1e-6) + expect_equal(results.sct[1, "pct.1"], 0.167) + expect_equal(results.sct[1, "pct.2"], 0.909) + expect_equal(results.sct[1, "p_val_adj"], 1.369608e-08) + expect_equal(nrow(x = results.sct), 201) + expect_equal(rownames(x = results.sct)[1], "HLA-DPB1") + + # pseudocount.use = 0.1 + expect_equal(results.pseudo[1, "p_val"], 9.572778e-13) + expect_equal(results.pseudo[1, "avg_log2FC"], -6.013818, tolerance = 1e-6) + expect_equal(results.pseudo[1, "pct.1"], 0.083) + expect_equal(results.pseudo[1, "pct.2"], 0.909) + expect_equal(results.pseudo[1, "p_val_adj"], 2.201739e-10) + expect_equal(nrow(x = results.pseudo), 222) + expect_equal(rownames(results.pseudo)[1], "HLA-DPB1") +}) + # Tests for FindConservedMarkers # ------------------------------------------------------------------------------- diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 89474010e..d84ef3bdd 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -2,7 +2,7 @@ set.seed(42) pbmc.file <- system.file('extdata', 'pbmc_raw.txt', package = 'Seurat') -pbmc.test <- as(as.matrix(read.table(pbmc.file, sep = "\t", row.names = 1)), "dgCMatrix") +pbmc.test <- as.sparse(x = as.matrix(read.table(pbmc.file, sep = "\t", row.names = 1))) # Tests for object creation (via CreateSeuratObject) # -------------------------------------------------------------------------------- diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 6542d8c17..079634451 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -1,7 +1,7 @@ set.seed(42) pbmc.file <- system.file('extdata', 'pbmc_raw.txt', package = 'Seurat') -pbmc.test <- as(as.matrix(read.table(pbmc.file, sep = "\t", row.names = 1)), "dgCMatrix") +pbmc.test <- as.sparse(x = as.matrix(read.table(pbmc.file, sep = "\t", row.names = 1))) meta.data <- data.frame( a = rep(as.factor(c('a', 'b', 'c')), length.out = ncol(pbmc.test)), diff --git a/vignettes/integration_rpca.Rmd b/vignettes/integration_rpca.Rmd index e0f6c96e2..d1ae0d266 100644 --- a/vignettes/integration_rpca.Rmd +++ b/vignettes/integration_rpca.Rmd @@ -37,7 +37,7 @@ By identifying shared sources of variation between datasets, CCA is well-suited RPCA-based integration runs significantly faster, and also represents a more conservative approach where cells in different biological states are less likely to 'align' after integration. We therefore,recommend RPCA during integrative analysis where: * A substantial fraction of cells in one dataset have no matching type in the other * Datasets originate from the same platform (i.e. multiple lanes of 10x genomics) -* There are a large number of datasets or cells to integrate (see INSERT LINK for more tips on integrating large datasets) +* There are a large number of datasets or cells to integrate (see [here](integration_large_datasets.html) for more tips on integrating large datasets) Below, we demonstrate the use of reciprocal PCA to align the same stimulated and resting datasets first analyzed in our [introduction to scRNA-seq integration](integration_introduction.html) vignette. While the list of commands is nearly identical, this workflow requires users to run principal components analysis (PCA) individually on each dataset prior to integration. Users should also set the 'reduction' argument to 'rpca', when running `FindIntegrationAnchors()`.