Skip to content

Commit

Permalink
Merge pull request #5502 from satijalab/develop
Browse files Browse the repository at this point in the history
Seurat v4.1.0
  • Loading branch information
mojaveazure committed Jan 14, 2022
2 parents 8da35ba + 5656a64 commit f1b2593
Show file tree
Hide file tree
Showing 29 changed files with 712 additions and 61 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: Seurat
Version: 4.0.6
Date: 2021-12-16
Version: 4.1.0
Date: 2022-01-14
Title: Tools for Single Cell Genomics
Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
Authors@R: c(
Expand Down Expand Up @@ -65,7 +65,7 @@ Imports:
Rtsne,
scales,
scattermore (>= 0.7),
sctransform (>= 0.3.2),
sctransform (>= 0.3.3),
SeuratObject (>= 4.0.4),
shiny,
spatstat.core,
Expand Down Expand Up @@ -95,7 +95,7 @@ Collate:
'tree.R'
'utilities.R'
'zzz.R'
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
Encoding: UTF-8
Suggests:
ape,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -16,6 +16,7 @@ S3method(FindClusters,Seurat)
S3method(FindClusters,default)
S3method(FindMarkers,Assay)
S3method(FindMarkers,DimReduc)
S3method(FindMarkers,SCTAssay)
S3method(FindMarkers,Seurat)
S3method(FindMarkers,default)
S3method(FindNeighbors,Assay)
Expand Down Expand Up @@ -259,6 +260,7 @@ export(PolyDimPlot)
export(PolyFeaturePlot)
export(PredictAssay)
export(PrepLDA)
export(PrepSCTFindMarkers)
export(PrepSCTIntegration)
export(Project)
export(ProjectDim)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
@@ -1,3 +1,13 @@
# Seurat 4.1.0 (2022-01-14)
## Added
- Add `raster.dpi` parameter to `DimPlot/FeaturePlot` to optionally rasterize individual points ([#5392](https://github.com/satijalab/seurat/pull/5392))
- Add support for sctransform v2, differential expression on with SCT

## Changes
- Update `ReadParseBio` to support split-pipe 0.9.6p ([#5446](https://github.com/satijalab/seurat/pull/5446))
- Fixes for MAST differential expression ([#5441](https://github.com/satijalab/seurat/issues/5441))
- Fix scaling options when using `split.by` in `FeaturePlot()` ([#5243](https://github.com/satijalab/seurat/issues/5243))

# Seurat 4.0.6 (2021-12-16)
## Added

Expand Down
2 changes: 1 addition & 1 deletion R/convenience.R
Expand Up @@ -186,7 +186,7 @@ SpecificDimPlot <- function(object, ...) {
ReadParseBio <- function(data.dir, ...) {
mtx <- file.path(data.dir, "DGE.mtx")
cells <- file.path(data.dir, "cell_metadata.csv")
features <- file.path(data.dir, "genes.csv")
features <- file.path(data.dir, "all_genes.csv")
return(ReadMtx(
mtx = mtx,
cells = cells,
Expand Down
248 changes: 246 additions & 2 deletions R/differential_expression.R
Expand Up @@ -680,6 +680,110 @@ FindMarkers.Assay <- function(
return(de.results)
}

#' @param recorrect_umi Recalculate corrected UMI counts using minimum of the median UMIs when performing DE using multiple SCT objects; default is TRUE
#'
#' @rdname FindMarkers
#' @concept differential_expression
#' @export
#' @method FindMarkers SCTAssay
#'
FindMarkers.SCTAssay <- function(
object,
slot = "data",
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
logfc.threshold = 0.25,
test.use = 'wilcox',
min.pct = 0.1,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
mean.fxn = NULL,
fc.name = NULL,
base = 2,
densify = FALSE,
recorrect_umi = TRUE,
...
) {
data.slot <- ifelse(
test = test.use %in% DEmethods_counts(),
yes = 'counts',
no = slot
)
if (recorrect_umi && length(x = levels(x = object)) > 1) {
cell_attributes <- SCTResults(object = object, slot = "cell.attributes")
observed_median_umis <- lapply(
X = cell_attributes,
FUN = function(x) median(x[, "umi"])
)
model.list <- slot(object = object, "SCTModel.list")
median_umi.status <- lapply(X = model.list,
FUN = function(x) { return(tryCatch(
expr = slot(object = x, name = 'median_umi'),
error = function(...) {return(NULL)})
)})
if (any(is.null(unlist(median_umi.status)))){
stop("SCT assay does not contain median UMI information.",
"Run `PrepSCTFindMarkers()` before running `FindMarkers()` or invoke `FindMarkers(recorrect_umi=FALSE)`.")
}
model_median_umis <- SCTResults(object = object, slot = "median_umi")
min_median_umi <- min(unlist(x = observed_median_umis))
if (any(unlist(model_median_umis) != min_median_umi)){
stop("Object contains multiple models with unequal library sizes. Run `PrepSCTFindMarkers()` before running `FindMarkers()`.")
}
}

data.use <- GetAssayData(object = object, slot = data.slot)
counts <- switch(
EXPR = data.slot,
'scale.data' = GetAssayData(object = object, slot = "counts"),
numeric()
)
fc.results <- FoldChange(
object = object,
slot = data.slot,
cells.1 = cells.1,
cells.2 = cells.2,
features = features,
pseudocount.use = pseudocount.use,
mean.fxn = mean.fxn,
fc.name = fc.name,
base = base
)
de.results <- FindMarkers(
object = data.use,
slot = data.slot,
counts = counts,
cells.1 = cells.1,
cells.2 = cells.2,
features = features,
logfc.threshold = logfc.threshold,
test.use = test.use,
min.pct = min.pct,
min.diff.pct = min.diff.pct,
verbose = verbose,
only.pos = only.pos,
max.cells.per.ident = max.cells.per.ident,
random.seed = random.seed,
latent.vars = latent.vars,
min.cells.feature = min.cells.feature,
min.cells.group = min.cells.group,
pseudocount.use = pseudocount.use,
fc.results = fc.results,
densify = densify,
...
)
return(de.results)
}


#' @importFrom Matrix rowMeans
#' @rdname FindMarkers
#' @concept differential_expression
Expand Down Expand Up @@ -849,7 +953,7 @@ FindMarkers.Seurat <- function(
cellnames.use <- colnames(x = data.use)
} else {
data.use <- object[[reduction]]
cellnames.use <- rownames(data.use)
cellnames.use <- rownames(x = data.use)
}
cells <- IdentsToCells(
object = object,
Expand Down Expand Up @@ -1709,7 +1813,7 @@ MASTDETest <- function(
object = paste0(" ~ ", paste(latent.vars.names, collapse = "+"))
)
zlmCond <- MAST::zlm(formula = fmla, sca = sca, ...)
summaryCond <- summary(object = zlmCond, doLRT = 'conditionGroup2')
summaryCond <- MAST::summary(object = zlmCond, doLRT = 'conditionGroup2')
summaryDt <- summaryCond$datatable
# fcHurdle <- merge(
# summaryDt[contrast=='conditionGroup2' & component=='H', .(primerid, `Pr(>Chisq)`)], #hurdle P values
Expand Down Expand Up @@ -1877,6 +1981,146 @@ PerformDE <- function(
return(de.results)
}

#' Prepare object to run differential expression on SCT assay with multiple models
#'
#' Given a merged object with multiple SCT models, this function uses minimum
#' of the median UMI (calculated using the raw UMI counts) of individual objects
#' to reverse the individual SCT regression model using minimum of median UMI
#' as the sequencing depth covariate.
#' The counts slot of the SCT assay is replaced with recorrected counts and
#' the data slot is replaced with log1p of recorrected counts.
#' @param object Seurat object with SCT assays
#' @param assay Assay name where for SCT objects are stored; Default is 'SCT'
#' @param verbose Print messages and progress
#' @importFrom Matrix Matrix
#' @importFrom sctransform correct_counts
#'
#' @return Returns a Seurat object with recorrected counts and data in the SCT assay.
#' @export
#'
#' @concept differential_expression
#' @examples
#' data("pbmc_small")
#' pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20)
#' pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20)
#' pbmc_merged <- merge(x = pbmc_small1, y = pbmc_small2)
#' pbmc_merged <- PrepSCTFindMarkers(object = pbmc_merged)
#' markers <- FindMarkers(
#' object = pbmc_merged,
#' ident.1 = "0",
#' ident.2 = "1",
#' assay = "SCT"
#' )
#' pbmc_subset <- subset(pbmc_merged, idents = c("0", "1"))
#' markers_subset <- FindMarkers(
#' object = pbmc_subset,
#' ident.1 = "0",
#' ident.2 = "1",
#' assay = "SCT",
#' recorrect_umi = FALSE
#' )
#'
PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) {
if (length(x = levels(x = object[[assay]])) == 1) {
if (verbose) {
message("Only one SCT model is stored - skipping recalculating corrected counts")
}
return(object)
}
observed_median_umis <- lapply(
X = SCTResults(object = object[[assay]], slot = "cell.attributes"),
FUN = function(x) median(x[, "umi"])
)
model.list <- slot(object = object[[assay]], name = "SCTModel.list")
median_umi.status <- lapply(X = model.list,
FUN = function(x) { return(tryCatch(
expr = slot(object = x, name = 'median_umi'),
error = function(...) {return(NULL)})
)})
if (any(is.null(x = unlist(x = median_umi.status)))){
# For old SCT objects median_umi is set to median umi as calculated from obserbed UMIs
slot(object = object[[assay]], name = "SCTModel.list") <- lapply(X = model.list,
FUN = UpdateSlots)
SCTResults(object = object[[assay]], slot = "median_umi") <- observed_median_umis

}
model_median_umis <- SCTResults(object = object[[assay]], slot = "median_umi")
min_median_umi <- min(unlist(x = observed_median_umis))
if (all(unlist(x = model_median_umis) == min_median_umi)){
if (verbose){
message("Minimum UMI unchanged. Skipping re-correction.")
}
return(object)
}
if (verbose) {
message(paste0("Found ",
length(x = levels(x = object[[assay]])),
" SCT models.",
" Recorrecting SCT counts using minimum median counts: ",
min_median_umi))
}
umi.assay <- unique(
x = unlist(
x = SCTResults(object = object[[assay]], slot = "umi.assay")
)
)
if (length(x = umi.assay) > 1) {
stop("Multiple UMI assays are used for SCTransform: ",
paste(umi.assay, collapse = ", ")
)
}
raw_umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts")
corrected_counts <- Matrix(
nrow = nrow(x = raw_umi),
ncol = ncol(x = raw_umi),
data = 0,
dimnames = dimnames(x = raw_umi),
sparse = TRUE
)
cell_attr <- SCTResults(object = object[[assay]], slot = "cell.attributes")
model_pars_fit <- lapply(
X = SCTResults(object = object[[assay]], slot = "feature.attributes"),
FUN = function(x) x[, c("theta", "(Intercept)", "log_umi")]
)
arguments <- SCTResults(object = object[[assay]], slot = "arguments")
model_str <- SCTResults(object = object[[assay]], slot = "model")
set_median_umi <- rep(min_median_umi, length(levels(x = object[[assay]])))
names(set_median_umi) <- levels(x = object[[assay]])
set_median_umi <- as.list(set_median_umi)
# correct counts
for (model_name in levels(x = object[[assay]])) {
model_genes <- rownames(x = model_pars_fit[[model_name]])
x <- list(
model_str = model_str[[model_name]],
arguments = arguments[[model_name]],
model_pars_fit = as.matrix(x = model_pars_fit[[model_name]]),
cell_attr = cell_attr[[model_name]]
)
cells <- rownames(x = cell_attr[[model_name]])
umi <- raw_umi[model_genes, cells]

umi_corrected <- correct_counts(
x = x,
umi = umi,
verbosity = 0,
scale_factor = min_median_umi
)
corrected_counts[rownames(umi_corrected), colnames(umi_corrected)] <- umi_corrected
}
corrected_data <- log1p(corrected_counts)
suppressWarnings({object <- SetAssayData(object = object,
assay = assay,
slot = "counts",
new.data = corrected_counts)})
suppressWarnings({object <- SetAssayData(object = object,
assay = assay,
slot = "data",
new.data = corrected_data)})
SCTResults(object = object[[assay]], slot = "median_umi") <- set_median_umi

return(object)
}

# given a UMI count matrix, estimate NB theta parameter for each gene
# and use fit of relationship with mean to assign regularized theta to each gene
#
Expand Down
2 changes: 1 addition & 1 deletion R/integration.R
Expand Up @@ -1912,7 +1912,7 @@ LocalStruct <- function(
#' }
#'
#' @importFrom rlang invoke
#'
#'
#' @export
#' @concept integration
#'
Expand Down

0 comments on commit f1b2593

Please sign in to comment.