Merge pull request #5502 from satijalab/develop

Seurat v4.1.0
satijalab · Jan 14, 2022 · f1b2593 · f1b2593
2 parents 8da35ba + 5656a64
commit f1b2593
Show file tree

Hide file tree

Showing 29 changed files with 712 additions and 61 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 4.0.6
-Date: 2021-12-16
+Version: 4.1.0
+Date: 2022-01-14
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(
@@ -65,7 +65,7 @@ Imports:
     Rtsne,
     scales,
     scattermore (>= 0.7),
-    sctransform (>= 0.3.2),
+    sctransform (>= 0.3.3),
     SeuratObject (>= 4.0.4),
     shiny,
     spatstat.core,
@@ -95,7 +95,7 @@ Collate:
     'tree.R'
     'utilities.R'
     'zzz.R'
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 Encoding: UTF-8
 Suggests:
     ape,

diff --git a/NAMESPACE b/NAMESPACE
@@ -16,6 +16,7 @@ S3method(FindClusters,Seurat)
 S3method(FindClusters,default)
 S3method(FindMarkers,Assay)
 S3method(FindMarkers,DimReduc)
+S3method(FindMarkers,SCTAssay)
 S3method(FindMarkers,Seurat)
 S3method(FindMarkers,default)
 S3method(FindNeighbors,Assay)
@@ -259,6 +260,7 @@ export(PolyDimPlot)
 export(PolyFeaturePlot)
 export(PredictAssay)
 export(PrepLDA)
+export(PrepSCTFindMarkers)
 export(PrepSCTIntegration)
 export(Project)
 export(ProjectDim)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,13 @@
+# Seurat 4.1.0 (2022-01-14)
+## Added
+- Add `raster.dpi` parameter to `DimPlot/FeaturePlot` to optionally rasterize individual points ([#5392](https://github.com/satijalab/seurat/pull/5392))
+- Add support for sctransform v2, differential expression on with SCT
+
+## Changes
+- Update `ReadParseBio` to support split-pipe 0.9.6p ([#5446](https://github.com/satijalab/seurat/pull/5446))
+- Fixes for MAST differential expression ([#5441](https://github.com/satijalab/seurat/issues/5441))
+- Fix scaling options when using `split.by` in `FeaturePlot()` ([#5243](https://github.com/satijalab/seurat/issues/5243)) 
+
 # Seurat 4.0.6 (2021-12-16)
 ## Added
 

diff --git a/R/convenience.R b/R/convenience.R
@@ -186,7 +186,7 @@ SpecificDimPlot <- function(object, ...) {
 ReadParseBio <- function(data.dir, ...) {
   mtx <- file.path(data.dir, "DGE.mtx")
   cells <- file.path(data.dir, "cell_metadata.csv")
-  features <- file.path(data.dir, "genes.csv")
+  features <- file.path(data.dir, "all_genes.csv")
   return(ReadMtx(
     mtx = mtx,
     cells = cells,

diff --git a/R/differential_expression.R b/R/differential_expression.R
@@ -680,6 +680,110 @@ FindMarkers.Assay <- function(
   return(de.results)
 }
 
+#' @param recorrect_umi Recalculate corrected UMI counts using minimum of the median UMIs when performing DE using multiple SCT objects; default is TRUE
+#'
+#' @rdname FindMarkers
+#' @concept differential_expression
+#' @export
+#' @method FindMarkers SCTAssay
+#'
+FindMarkers.SCTAssay <- function(
+  object,
+  slot = "data",
+  cells.1 = NULL,
+  cells.2 = NULL,
+  features = NULL,
+  logfc.threshold = 0.25,
+  test.use = 'wilcox',
+  min.pct = 0.1,
+  min.diff.pct = -Inf,
+  verbose = TRUE,
+  only.pos = FALSE,
+  max.cells.per.ident = Inf,
+  random.seed = 1,
+  latent.vars = NULL,
+  min.cells.feature = 3,
+  min.cells.group = 3,
+  pseudocount.use = 1,
+  mean.fxn = NULL,
+  fc.name = NULL,
+  base = 2,
+  densify = FALSE,
+  recorrect_umi = TRUE,
+  ...
+) {
+  data.slot <- ifelse(
+    test = test.use %in% DEmethods_counts(),
+    yes = 'counts',
+    no = slot
+  )
+  if (recorrect_umi && length(x = levels(x = object)) > 1) {
+    cell_attributes <- SCTResults(object = object, slot = "cell.attributes")
+    observed_median_umis <- lapply(
+      X = cell_attributes,
+      FUN = function(x) median(x[, "umi"])
+    )
+    model.list <- slot(object = object, "SCTModel.list")
+    median_umi.status <- lapply(X = model.list,
+                                FUN = function(x) { return(tryCatch(
+                                  expr = slot(object = x, name = 'median_umi'),
+                                  error = function(...) {return(NULL)})
+                                )})
+    if (any(is.null(unlist(median_umi.status)))){
+      stop("SCT assay does not contain median UMI information.",
+           "Run `PrepSCTFindMarkers()` before running `FindMarkers()` or invoke `FindMarkers(recorrect_umi=FALSE)`.")
+    }
+    model_median_umis <- SCTResults(object = object, slot = "median_umi")
+    min_median_umi <- min(unlist(x = observed_median_umis))
+    if (any(unlist(model_median_umis) != min_median_umi)){
+      stop("Object contains multiple models with unequal library sizes. Run `PrepSCTFindMarkers()` before running `FindMarkers()`.")
+    }
+  }
+
+  data.use <-  GetAssayData(object = object, slot = data.slot)
+  counts <- switch(
+    EXPR = data.slot,
+    'scale.data' = GetAssayData(object = object, slot = "counts"),
+    numeric()
+  )
+  fc.results <- FoldChange(
+    object = object,
+    slot = data.slot,
+    cells.1 = cells.1,
+    cells.2 = cells.2,
+    features = features,
+    pseudocount.use = pseudocount.use,
+    mean.fxn = mean.fxn,
+    fc.name = fc.name,
+    base = base
+  )
+  de.results <- FindMarkers(
+    object = data.use,
+    slot = data.slot,
+    counts = counts,
+    cells.1 = cells.1,
+    cells.2 = cells.2,
+    features = features,
+    logfc.threshold = logfc.threshold,
+    test.use = test.use,
+    min.pct = min.pct,
+    min.diff.pct = min.diff.pct,
+    verbose = verbose,
+    only.pos = only.pos,
+    max.cells.per.ident = max.cells.per.ident,
+    random.seed = random.seed,
+    latent.vars = latent.vars,
+    min.cells.feature = min.cells.feature,
+    min.cells.group = min.cells.group,
+    pseudocount.use = pseudocount.use,
+    fc.results = fc.results,
+    densify = densify,
+    ...
+  )
+  return(de.results)
+}
+
+
 #' @importFrom Matrix rowMeans
 #' @rdname FindMarkers
 #' @concept differential_expression
@@ -849,7 +953,7 @@ FindMarkers.Seurat <- function(
     cellnames.use <-  colnames(x = data.use)
   } else {
     data.use <- object[[reduction]]
-    cellnames.use <- rownames(data.use)
+    cellnames.use <- rownames(x = data.use)
   }
   cells <- IdentsToCells(
     object = object,
@@ -1709,7 +1813,7 @@ MASTDETest <- function(
     object = paste0(" ~ ", paste(latent.vars.names, collapse = "+"))
   )
   zlmCond <- MAST::zlm(formula = fmla, sca = sca, ...)
-  summaryCond <- summary(object = zlmCond, doLRT = 'conditionGroup2')
+  summaryCond <- MAST::summary(object = zlmCond, doLRT = 'conditionGroup2')
   summaryDt <- summaryCond$datatable
   # fcHurdle <- merge(
   #   summaryDt[contrast=='conditionGroup2' & component=='H', .(primerid, `Pr(>Chisq)`)], #hurdle P values
@@ -1877,6 +1981,146 @@ PerformDE <- function(
   return(de.results)
 }
 
+#' Prepare object to run differential expression on SCT assay with multiple models
+#'
+#' Given a merged object with multiple SCT models, this function uses minimum
+#' of the median UMI (calculated using the raw UMI counts) of individual objects
+#' to reverse the individual SCT regression model using minimum of median UMI
+#' as the sequencing depth covariate.
+#' The counts slot of the SCT assay is replaced with recorrected counts and
+#' the data slot is replaced with log1p of recorrected counts.
+#' @param object Seurat object with SCT assays
+#' @param assay Assay name where for SCT objects are stored; Default is 'SCT'
+#' @param verbose Print messages and progress
+#' @importFrom Matrix Matrix
+#' @importFrom sctransform correct_counts
+#'
+#' @return Returns a Seurat object with recorrected counts and data in the SCT assay.
+#' @export
+#'
+#' @concept differential_expression
+#' @examples
+#' data("pbmc_small")
+#' pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20)
+#' pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20)
+#' pbmc_merged <- merge(x = pbmc_small1, y = pbmc_small2)
+#' pbmc_merged <- PrepSCTFindMarkers(object = pbmc_merged)
+#' markers <- FindMarkers(
+#'   object = pbmc_merged,
+#'   ident.1 = "0",
+#'   ident.2 = "1",
+#'   assay = "SCT"
+#' )
+#' pbmc_subset <- subset(pbmc_merged, idents = c("0", "1"))
+#' markers_subset <- FindMarkers(
+#'   object = pbmc_subset,
+#'   ident.1 = "0",
+#'   ident.2 = "1",
+#'   assay = "SCT",
+#'   recorrect_umi = FALSE
+#' )
+#'
+PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) {
+  if (length(x = levels(x = object[[assay]])) == 1) {
+    if (verbose) {
+      message("Only one SCT model is stored - skipping recalculating corrected counts")
+    }
+    return(object)
+  }
+  observed_median_umis <- lapply(
+    X = SCTResults(object = object[[assay]], slot = "cell.attributes"),
+    FUN = function(x) median(x[, "umi"])
+  )
+  model.list <- slot(object = object[[assay]], name = "SCTModel.list")
+  median_umi.status <- lapply(X = model.list,
+                              FUN = function(x) { return(tryCatch(
+                                expr = slot(object = x, name = 'median_umi'),
+                                error = function(...) {return(NULL)})
+                              )})
+  if (any(is.null(x = unlist(x = median_umi.status)))){
+    # For old SCT objects  median_umi is set to median umi as calculated from obserbed UMIs
+    slot(object = object[[assay]], name = "SCTModel.list") <- lapply(X = model.list,
+                                                                     FUN = UpdateSlots)
+    SCTResults(object = object[[assay]], slot = "median_umi") <- observed_median_umis
+
+  }
+  model_median_umis <- SCTResults(object = object[[assay]], slot = "median_umi")
+  min_median_umi <- min(unlist(x = observed_median_umis))
+  if (all(unlist(x = model_median_umis) == min_median_umi)){
+    if (verbose){
+      message("Minimum UMI unchanged. Skipping re-correction.")
+    }
+    return(object)
+  }
+  if (verbose) {
+    message(paste0("Found ",
+                   length(x = levels(x = object[[assay]])),
+                   " SCT models.",
+                   " Recorrecting SCT counts using minimum median counts: ",
+                   min_median_umi))
+  }
+  umi.assay <- unique(
+    x = unlist(
+      x = SCTResults(object = object[[assay]], slot = "umi.assay")
+    )
+  )
+  if (length(x = umi.assay) > 1) {
+    stop("Multiple UMI assays are used for SCTransform: ",
+         paste(umi.assay, collapse = ", ")
+    )
+  }
+  raw_umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts")
+  corrected_counts <- Matrix(
+    nrow = nrow(x = raw_umi),
+    ncol = ncol(x = raw_umi),
+    data = 0,
+    dimnames = dimnames(x = raw_umi),
+    sparse = TRUE
+  )
+  cell_attr <- SCTResults(object = object[[assay]], slot = "cell.attributes")
+  model_pars_fit <- lapply(
+    X = SCTResults(object = object[[assay]], slot = "feature.attributes"),
+    FUN = function(x) x[, c("theta", "(Intercept)", "log_umi")]
+  )
+  arguments <- SCTResults(object = object[[assay]], slot = "arguments")
+  model_str <- SCTResults(object = object[[assay]], slot = "model")
+  set_median_umi <- rep(min_median_umi, length(levels(x = object[[assay]])))
+  names(set_median_umi) <- levels(x = object[[assay]])
+  set_median_umi <- as.list(set_median_umi)
+  # correct counts
+  for (model_name in levels(x = object[[assay]])) {
+    model_genes <- rownames(x = model_pars_fit[[model_name]])
+    x <- list(
+      model_str = model_str[[model_name]],
+      arguments = arguments[[model_name]],
+      model_pars_fit = as.matrix(x = model_pars_fit[[model_name]]),
+      cell_attr = cell_attr[[model_name]]
+    )
+    cells <- rownames(x = cell_attr[[model_name]])
+    umi <- raw_umi[model_genes, cells]
+
+    umi_corrected <- correct_counts(
+      x = x,
+      umi = umi,
+      verbosity = 0,
+      scale_factor = min_median_umi
+    )
+    corrected_counts[rownames(umi_corrected), colnames(umi_corrected)] <- umi_corrected
+  }
+  corrected_data <- log1p(corrected_counts)
+  suppressWarnings({object <- SetAssayData(object = object,
+                                           assay = assay,
+                                           slot = "counts",
+                                           new.data = corrected_counts)})
+  suppressWarnings({object <- SetAssayData(object = object,
+                                           assay = assay,
+                                           slot = "data",
+                                           new.data = corrected_data)})
+  SCTResults(object = object[[assay]], slot = "median_umi") <- set_median_umi
+
+  return(object)
+}
+
 # given a UMI count matrix, estimate NB theta parameter for each gene
 # and use fit of relationship with mean to assign regularized theta to each gene
 #

diff --git a/R/integration.R b/R/integration.R
@@ -1912,7 +1912,7 @@ LocalStruct <- function(
 #' }
 #'
 #' @importFrom rlang invoke
-#' 
+#'
 #' @export
 #' @concept integration
 #'